一文带你深入理解Python中zip的用法-华金科技

一、zip函数基础

zip() 是Python内置函数，用于将多个可迭代对象（列表、元组、字符串等）的对应元素打包成一个个元组。

1. 基本用法

# 合并两个列表
names = ["Alice", "Bob", "Charlie"]
ages = [25, 30, 35]

# 创建zip对象
zipped = zip(names, ages)
print(list(zipped))  # [('Alice', 25), ('Bob', 30), ('Charlie', 35)]

# 也可以直接解包
for name, age in zip(names, ages):
    print(f"{name} is {age} years old")

2. 不等长处理

# zip默认以最短序列为准
list1 = [1, 2, 3]
list2 = ["a", "b"]
print(list(zip(list1, list2)))  # [(1, 'a'), (2, 'b')]

# 使用itertools.zip_longest()处理不等长情况
from itertools import zip_longest
result = zip_longest(list1, list2, fillvalue="N/A")
print(list(result))  # [(1, 'a'), (2, 'b'), (3, 'N/A')]

二、进阶技巧与应用场景

1. 矩阵转置

# 二维矩阵转置
matrix = [
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
]

transposed = list(zip(*matrix))
print(transposed)  # [(1, 4, 7), (2, 5, 8), (3, 6, 9)]

2. 字典创建与合并

# 创建字典
keys = ["name", "age", "city"]
values = ["Alice", 25, "New York"]
person = dict(zip(keys, values))
print(person)  # {'name': 'Alice', 'age': 25, 'city': 'New York'}

# 字典合并
dict1 = {"a": 1, "b": 2}
dict2 = {"c": 3, "d": 4}
combined = dict(zip(dict1.keys(), dict2.values()))

3. 并行迭代与计算

# 多列表并行计算
prices = [100, 200, 150]
quantities = [2, 3, 1]
discounts = [0.1, 0.2, 0.05]

total_revenue = sum(price * qty * (1 - disc) 
                   for price, qty, disc in zip(prices, quantities, discounts))
print(total_revenue)  # 计算折扣后总营收

三、高级用法与注意事项

1. zip对象的特点

# zip返回的是迭代器（Python3中），只能消费一次
z = zip([1, 2], ["a", "b"])
print(list(z))  # [(1, 'a'), (2, 'b')]
print(list(z))  # [] ← 第二次为空！

# 需要重复使用时应先转换为列表
z = list(zip([1, 2], ["a", "b"]))

2. 与enumerate结合使用

# 带索引的并行迭代
names = ["Alice", "Bob", "Charlie"]
scores = [85, 92, 78]

for i, (name, score) in enumerate(zip(names, scores)):
    print(f"Rank {i+1}: {name} - {score} points")

3. 解压（逆操作）

# 使用 * 运算符解压
zipped = [("Alice", 25), ("Bob", 30), ("Charlie", 35)]
names, ages = zip(*zipped)
print(names)  # ('Alice', 'Bob', 'Charlie')
print(ages)   # (25, 30, 35)

四、性能对比与最佳实践

1. 性能对比

import timeit

# 传统for循环 vs zip
def traditional_loop():
    list1 = range(1000000)
    list2 = range(1000000)
    result = []
    for i in range(min(len(list1), len(list2))):
        result.append((list1[i], list2[i]))
    return result

def zip_method():
    list1 = range(1000000)
    list2 = range(1000000)
    return list(zip(list1, list2))

# zip更快且内存效率更高
print(timeit.timeit(traditional_loop, number=10))  # 较慢
print(timeit.timeit(zip_method, number=10))       # 较快

2. 最佳实践

# 1. 使用类型提示提高可读性
from typing import List, Tuple

def pair_data(ids: List[int], names: List[str]) -> List[Tuple[int, str]]:
    """将ID和姓名配对"""
    return list(zip(ids, names))

# 2. 处理不等长数据的防御性编程
def safe_zip(*iterables, strict=False):
    """
    安全的zip函数
    strict=True: 长度不等时抛出ValueError
    """
    if strict:
        return zip(*iterables, strict=True)  # Python 3.10+
    return zip(*iterables)

# 3. 链式处理示例
data1 = [1, 2, 3]
data2 = ["a", "b", "c"]
data3 = [True, False, True]

# 一步完成过滤、映射、配对
result = [
    (num * 2, char.upper()) 
    for num, char, flag in zip(data1, data2, data3) 
    if flag
]

五、实际应用案例

案例1：数据清洗与对齐

# CSV数据列对齐处理
def align_columns(csv_rows):
    """
    确保所有行具有相同数量的列
    用空字符串填充缺失值
    """
    max_len = max(len(row) for row in csv_rows)
    padded_rows = [
        list(row) + [""] * (max_len - len(row)) 
        for row in csv_rows
    ]
    # 转换为列优先格式便于处理
    columns = list(zip(*padded_rows))
    return columns

案例2：分组数据处理

# 按批次处理数据
def batch_process(data, batch_size=3):
    """将数据分批次处理"""
    it = iter(data)
    while batch := list(zip(*[it] * batch_size)):
        yield batch

# 使用示例
data = range(10)
for batch in batch_process(data, batch_size=3):
    print(batch)  # (0,1,2), (3,4,5), (6,7,8), (9,)

案例3：多维度排序

# 按多个键排序
students = [
    {"name": "Alice", "math": 85, "english": 90},
    {"name": "Bob", "math": 92, "english": 85},
    {"name": "Charlie", "math": 85, "english": 95}
]

# 先按数学降序，再按英语降序
math_scores = [s["math"] for s in students]
english_scores = [s["english"] for s in students]

# 通过zip创建复合键
sorted_students = [
    students[i] for i, _ in sorted(
        enumerate(zip(math_scores, english_scores)),
        key=lambda x: x[1],
        reverse=True
    )
]