上海华金科技

Python3处理json文件和csv文件

2026-03-25 15:53:02 浏览次数:1
详细信息

1. JSON文件处理

读取JSON文件

import json

# 方法1:直接读取文件
with open('data.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# 方法2:从字符串解析
json_string = '{"name": "张三", "age": 25}'
data = json.loads(json_string)

写入JSON文件

import json

data = {
    "name": "李四",
    "age": 30,
    "city": "北京",
    "skills": ["Python", "Java", "SQL"]
}

# 方法1:写入文件
with open('output.json', 'w', encoding='utf-8') as f:
    json.dump(data, f, ensure_ascii=False, indent=2)

# 方法2:转换为字符串
json_string = json.dumps(data, ensure_ascii=False, indent=2)

处理复杂JSON示例

import json

# 读取嵌套JSON
with open('complex_data.json', 'r') as f:
    complex_data = json.load(f)

# 访问嵌套数据
employees = complex_data['company']['employees']
for emp in employees:
    print(f"姓名: {emp['name']}, 职位: {emp['position']}")

# 修改数据
complex_data['company']['location'] = '上海'

# 保存修改
with open('updated_data.json', 'w', encoding='utf-8') as f:
    json.dump(complex_data, f, ensure_ascii=False, indent=2)

2. CSV文件处理

使用csv模块

import csv

# 读取CSV文件
with open('data.csv', 'r', encoding='utf-8-sig') as f:
    reader = csv.reader(f)
    for row in reader:
        print(row)

# 使用DictReader(带表头)
with open('data.csv', 'r', encoding='utf-8-sig') as f:
    reader = csv.DictReader(f)
    for row in reader:
        print(f"姓名: {row['name']}, 年龄: {row['age']}")

# 写入CSV文件
data = [
    ['姓名', '年龄', '城市'],
    ['张三', '25', '北京'],
    ['李四', '30', '上海'],
    ['王五', '28', '广州']
]

with open('output.csv', 'w', encoding='utf-8-sig', newline='') as f:
    writer = csv.writer(f)
    writer.writerows(data)

# 使用DictWriter写入
fieldnames = ['name', 'age', 'city']
data_dict = [
    {'name': '张三', 'age': 25, 'city': '北京'},
    {'name': '李四', 'age': 30, 'city': '上海'}
]

with open('dict_output.csv', 'w', encoding='utf-8-sig', newline='') as f:
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(data_dict)

使用pandas处理CSV(推荐)

import pandas as pd

# 读取CSV
df = pd.read_csv('data.csv', encoding='utf-8-sig')

# 查看数据
print(df.head())  # 前5行
print(df.info())  # 数据信息
print(df.describe())  # 统计描述

# 数据处理
# 筛选数据
filtered = df[df['age'] > 25]

# 添加新列
df['age_group'] = pd.cut(df['age'], bins=[0, 20, 30, 40], labels=['青年', '中年', '壮年'])

# 分组统计
grouped = df.groupby('city')['age'].mean()

# 写入CSV
df.to_csv('processed_data.csv', index=False, encoding='utf-8-sig')

3. JSON和CSV相互转换

JSON转CSV

import json
import csv

# 读取JSON文件
with open('data.json', 'r', encoding='utf-8') as f:
    json_data = json.load(f)

# 假设JSON是字典列表形式
if isinstance(json_data, list):
    # 提取所有键作为CSV表头
    fieldnames = json_data[0].keys()

    with open('json_to_csv.csv', 'w', encoding='utf-8-sig', newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(json_data)

CSV转JSON

import csv
import json

csv_data = []

# 读取CSV
with open('data.csv', 'r', encoding='utf-8-sig') as f:
    reader = csv.DictReader(f)
    for row in reader:
        csv_data.append(row)

# 保存为JSON
with open('csv_to_json.json', 'w', encoding='utf-8') as f:
    json.dump(csv_data, f, ensure_ascii=False, indent=2)

使用pandas转换

import pandas as pd

# CSV转JSON
df = pd.read_csv('data.csv')
df.to_json('output.json', orient='records', force_ascii=False, indent=2)

# JSON转CSV
df = pd.read_json('data.json')
df.to_csv('output.csv', index=False, encoding='utf-8-sig')

4. 实用函数封装

import json
import csv
import pandas as pd
from typing import List, Dict, Any

class FileProcessor:
    """文件处理工具类"""

    @staticmethod
    def read_json(filepath: str) -> Any:
        """读取JSON文件"""
        with open(filepath, 'r', encoding='utf-8') as f:
            return json.load(f)

    @staticmethod
    def write_json(data: Any, filepath: str, indent: int = 2):
        """写入JSON文件"""
        with open(filepath, 'w', encoding='utf-8') as f:
            json.dump(data, f, ensure_ascii=False, indent=indent)

    @staticmethod
    def read_csv(filepath: str) -> List[Dict]:
        """读取CSV文件为字典列表"""
        data = []
        with open(filepath, 'r', encoding='utf-8-sig') as f:
            reader = csv.DictReader(f)
            for row in reader:
                data.append(row)
        return data

    @staticmethod
    def write_csv(data: List[Dict], filepath: str):
        """将字典列表写入CSV"""
        if not data:
            return

        fieldnames = data[0].keys()
        with open(filepath, 'w', encoding='utf-8-sig', newline='') as f:
            writer = csv.DictWriter(f, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(data)

    @staticmethod
    def json_to_csv(json_file: str, csv_file: str):
        """JSON转CSV"""
        data = FileProcessor.read_json(json_file)
        if isinstance(data, list):
            FileProcessor.write_csv(data, csv_file)
        else:
            raise ValueError("JSON数据必须是列表格式")

    @staticmethod
    def csv_to_json(csv_file: str, json_file: str):
        """CSV转JSON"""
        data = FileProcessor.read_csv(csv_file)
        FileProcessor.write_json(data, json_file)

# 使用示例
processor = FileProcessor()

# 读取文件
json_data = processor.read_json('data.json')
csv_data = processor.read_csv('data.csv')

# 转换格式
processor.json_to_csv('input.json', 'output.csv')
processor.csv_to_json('input.csv', 'output.json')

5. 处理特殊情况的技巧

处理大文件

# 逐行读取大JSON文件(JSON Lines格式)
def read_large_json(filepath):
    with open(filepath, 'r', encoding='utf-8') as f:
        for line in f:
            yield json.loads(line.strip())

# 分块读取大CSV文件
chunk_size = 1000
for chunk in pd.read_csv('large_data.csv', chunksize=chunk_size):
    process_chunk(chunk)  # 处理每个数据块

处理编码问题

# 自动检测编码
import chardet

def detect_encoding(filepath):
    with open(filepath, 'rb') as f:
        result = chardet.detect(f.read())
    return result['encoding']

encoding = detect_encoding('data.csv')
df = pd.read_csv('data.csv', encoding=encoding)

处理缺失值

# 读取时指定缺失值
df = pd.read_csv('data.csv', na_values=['NA', 'N/A', 'null', ''])

# 填充或删除缺失值
df_filled = df.fillna(0)  # 用0填充
df_dropped = df.dropna()  # 删除包含缺失值的行

最佳实践建议

统一编码:建议统一使用UTF-8编码 异常处理:总是添加try-except处理文件操作 上下文管理:使用with语句确保文件正确关闭 数据验证:在处理前验证数据格式 备份原始数据:重要数据处理前先备份

这些方法应该能满足大多数JSON和CSV文件处理的需求。根据具体场景选择合适的方法:简单场景用标准库,复杂数据处理用pandas。

相关推荐