1. JSON文件处理
读取JSON文件
import json
# 方法1:直接读取文件
with open('data.json', 'r', encoding='utf-8') as f:
data = json.load(f)
# 方法2:从字符串解析
json_string = '{"name": "张三", "age": 25}'
data = json.loads(json_string)
写入JSON文件
import json
data = {
"name": "李四",
"age": 30,
"city": "北京",
"skills": ["Python", "Java", "SQL"]
}
# 方法1:写入文件
with open('output.json', 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
# 方法2:转换为字符串
json_string = json.dumps(data, ensure_ascii=False, indent=2)
处理复杂JSON示例
import json
# 读取嵌套JSON
with open('complex_data.json', 'r') as f:
complex_data = json.load(f)
# 访问嵌套数据
employees = complex_data['company']['employees']
for emp in employees:
print(f"姓名: {emp['name']}, 职位: {emp['position']}")
# 修改数据
complex_data['company']['location'] = '上海'
# 保存修改
with open('updated_data.json', 'w', encoding='utf-8') as f:
json.dump(complex_data, f, ensure_ascii=False, indent=2)
2. CSV文件处理
使用csv模块
import csv
# 读取CSV文件
with open('data.csv', 'r', encoding='utf-8-sig') as f:
reader = csv.reader(f)
for row in reader:
print(row)
# 使用DictReader(带表头)
with open('data.csv', 'r', encoding='utf-8-sig') as f:
reader = csv.DictReader(f)
for row in reader:
print(f"姓名: {row['name']}, 年龄: {row['age']}")
# 写入CSV文件
data = [
['姓名', '年龄', '城市'],
['张三', '25', '北京'],
['李四', '30', '上海'],
['王五', '28', '广州']
]
with open('output.csv', 'w', encoding='utf-8-sig', newline='') as f:
writer = csv.writer(f)
writer.writerows(data)
# 使用DictWriter写入
fieldnames = ['name', 'age', 'city']
data_dict = [
{'name': '张三', 'age': 25, 'city': '北京'},
{'name': '李四', 'age': 30, 'city': '上海'}
]
with open('dict_output.csv', 'w', encoding='utf-8-sig', newline='') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(data_dict)
使用pandas处理CSV(推荐)
import pandas as pd
# 读取CSV
df = pd.read_csv('data.csv', encoding='utf-8-sig')
# 查看数据
print(df.head()) # 前5行
print(df.info()) # 数据信息
print(df.describe()) # 统计描述
# 数据处理
# 筛选数据
filtered = df[df['age'] > 25]
# 添加新列
df['age_group'] = pd.cut(df['age'], bins=[0, 20, 30, 40], labels=['青年', '中年', '壮年'])
# 分组统计
grouped = df.groupby('city')['age'].mean()
# 写入CSV
df.to_csv('processed_data.csv', index=False, encoding='utf-8-sig')
3. JSON和CSV相互转换
JSON转CSV
import json
import csv
# 读取JSON文件
with open('data.json', 'r', encoding='utf-8') as f:
json_data = json.load(f)
# 假设JSON是字典列表形式
if isinstance(json_data, list):
# 提取所有键作为CSV表头
fieldnames = json_data[0].keys()
with open('json_to_csv.csv', 'w', encoding='utf-8-sig', newline='') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(json_data)
CSV转JSON
import csv
import json
csv_data = []
# 读取CSV
with open('data.csv', 'r', encoding='utf-8-sig') as f:
reader = csv.DictReader(f)
for row in reader:
csv_data.append(row)
# 保存为JSON
with open('csv_to_json.json', 'w', encoding='utf-8') as f:
json.dump(csv_data, f, ensure_ascii=False, indent=2)
使用pandas转换
import pandas as pd
# CSV转JSON
df = pd.read_csv('data.csv')
df.to_json('output.json', orient='records', force_ascii=False, indent=2)
# JSON转CSV
df = pd.read_json('data.json')
df.to_csv('output.csv', index=False, encoding='utf-8-sig')
4. 实用函数封装
import json
import csv
import pandas as pd
from typing import List, Dict, Any
class FileProcessor:
"""文件处理工具类"""
@staticmethod
def read_json(filepath: str) -> Any:
"""读取JSON文件"""
with open(filepath, 'r', encoding='utf-8') as f:
return json.load(f)
@staticmethod
def write_json(data: Any, filepath: str, indent: int = 2):
"""写入JSON文件"""
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=indent)
@staticmethod
def read_csv(filepath: str) -> List[Dict]:
"""读取CSV文件为字典列表"""
data = []
with open(filepath, 'r', encoding='utf-8-sig') as f:
reader = csv.DictReader(f)
for row in reader:
data.append(row)
return data
@staticmethod
def write_csv(data: List[Dict], filepath: str):
"""将字典列表写入CSV"""
if not data:
return
fieldnames = data[0].keys()
with open(filepath, 'w', encoding='utf-8-sig', newline='') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(data)
@staticmethod
def json_to_csv(json_file: str, csv_file: str):
"""JSON转CSV"""
data = FileProcessor.read_json(json_file)
if isinstance(data, list):
FileProcessor.write_csv(data, csv_file)
else:
raise ValueError("JSON数据必须是列表格式")
@staticmethod
def csv_to_json(csv_file: str, json_file: str):
"""CSV转JSON"""
data = FileProcessor.read_csv(csv_file)
FileProcessor.write_json(data, json_file)
# 使用示例
processor = FileProcessor()
# 读取文件
json_data = processor.read_json('data.json')
csv_data = processor.read_csv('data.csv')
# 转换格式
processor.json_to_csv('input.json', 'output.csv')
processor.csv_to_json('input.csv', 'output.json')
5. 处理特殊情况的技巧
处理大文件
# 逐行读取大JSON文件(JSON Lines格式)
def read_large_json(filepath):
with open(filepath, 'r', encoding='utf-8') as f:
for line in f:
yield json.loads(line.strip())
# 分块读取大CSV文件
chunk_size = 1000
for chunk in pd.read_csv('large_data.csv', chunksize=chunk_size):
process_chunk(chunk) # 处理每个数据块
处理编码问题
# 自动检测编码
import chardet
def detect_encoding(filepath):
with open(filepath, 'rb') as f:
result = chardet.detect(f.read())
return result['encoding']
encoding = detect_encoding('data.csv')
df = pd.read_csv('data.csv', encoding=encoding)
处理缺失值
# 读取时指定缺失值
df = pd.read_csv('data.csv', na_values=['NA', 'N/A', 'null', ''])
# 填充或删除缺失值
df_filled = df.fillna(0) # 用0填充
df_dropped = df.dropna() # 删除包含缺失值的行
最佳实践建议
统一编码:建议统一使用UTF-8编码
异常处理:总是添加try-except处理文件操作
上下文管理:使用with语句确保文件正确关闭
数据验证:在处理前验证数据格式
备份原始数据:重要数据处理前先备份
这些方法应该能满足大多数JSON和CSV文件处理的需求。根据具体场景选择合适的方法:简单场景用标准库,复杂数据处理用pandas。