Python CSV

CSV (Comma-Separated Values) 是一種常見的表格資料格式。Python 的 csv 模組提供了讀寫 CSV 檔案的功能。

CSV 格式

name,age,city
Alice,25,Taipei
Bob,30,Tokyo
Charlie,35,Seoul

讀取 CSV

使用 csv.reader

import csv

with open("data.csv", "r", encoding="utf-8") as f:
    reader = csv.reader(f)
    for row in reader:
        print(row)

輸出:

['name', 'age', 'city']
['Alice', '25', 'Taipei']
['Bob', '30', 'Tokyo']
['Charlie', '35', 'Seoul']

跳過標題行

import csv

with open("data.csv", "r", encoding="utf-8") as f:
    reader = csv.reader(f)
    header = next(reader)  # 讀取標題行
    print(f"Header: {header}")
    
    for row in reader:
        print(row)

使用 csv.DictReader

將每一行讀取為字典:

import csv

with open("data.csv", "r", encoding="utf-8") as f:
    reader = csv.DictReader(f)
    for row in reader:
        print(row)

輸出:

{'name': 'Alice', 'age': '25', 'city': 'Taipei'}
{'name': 'Bob', 'age': '30', 'city': 'Tokyo'}
{'name': 'Charlie', 'age': '35', 'city': 'Seoul'}
# 存取特定欄位
with open("data.csv", "r", encoding="utf-8") as f:
    reader = csv.DictReader(f)
    for row in reader:
        print(f"{row['name']} is {row['age']} years old")

寫入 CSV

使用 csv.writer

import csv

data = [
    ["name", "age", "city"],
    ["Alice", 25, "Taipei"],
    ["Bob", 30, "Tokyo"],
    ["Charlie", 35, "Seoul"]
]

with open("output.csv", "w", encoding="utf-8", newline="") as f:
    writer = csv.writer(f)
    for row in data:
        writer.writerow(row)
Windows 上需要加上 newline="" 避免產生多餘的空行。

writerows() - 一次寫入多行

import csv

data = [
    ["name", "age", "city"],
    ["Alice", 25, "Taipei"],
    ["Bob", 30, "Tokyo"]
]

with open("output.csv", "w", encoding="utf-8", newline="") as f:
    writer = csv.writer(f)
    writer.writerows(data)

使用 csv.DictWriter

import csv

data = [
    {"name": "Alice", "age": 25, "city": "Taipei"},
    {"name": "Bob", "age": 30, "city": "Tokyo"},
    {"name": "Charlie", "age": 35, "city": "Seoul"}
]

with open("output.csv", "w", encoding="utf-8", newline="") as f:
    fieldnames = ["name", "age", "city"]
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    
    writer.writeheader()  # 寫入標題行
    writer.writerows(data)

分隔符號和引號

自訂分隔符號

import csv

# 使用 Tab 作為分隔符號
with open("data.tsv", "r") as f:
    reader = csv.reader(f, delimiter="\t")
    for row in reader:
        print(row)

# 使用分號作為分隔符號
with open("data.csv", "w", newline="") as f:
    writer = csv.writer(f, delimiter=";")
    writer.writerow(["name", "age", "city"])

引號處理

import csv

# 值包含逗號時需要引號
data = [
    ["name", "description"],
    ["Product A", "Red, large, heavy"],
    ["Product B", 'With "quotes"']
]

with open("output.csv", "w", newline="") as f:
    writer = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
    writer.writerows(data)

引號模式:

模式說明
csv.QUOTE_MINIMAL只在需要時加引號(預設)
csv.QUOTE_ALL所有欄位都加引號
csv.QUOTE_NONNUMERIC非數字欄位加引號
csv.QUOTE_NONE不加引號(需要設定 escapechar)

處理編碼

UTF-8 with BOM

某些程式(如 Excel)需要 BOM:

import csv

with open("data.csv", "w", encoding="utf-8-sig", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["姓名", "年齡", "城市"])
    writer.writerow(["小明", 25, "台北"])

讀取不同編碼

import csv

# 嘗試不同編碼
encodings = ["utf-8", "utf-8-sig", "big5", "gb2312"]

for encoding in encodings:
    try:
        with open("data.csv", "r", encoding=encoding) as f:
            reader = csv.reader(f)
            data = list(reader)
            print(f"Success with {encoding}")
            break
    except UnicodeDecodeError:
        continue

實際範例

統計 CSV 資料

import csv

def analyze_csv(filename):
    with open(filename, "r", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        rows = list(reader)
    
    # 統計
    total = len(rows)
    if total == 0:
        return None
    
    # 假設有 age 欄位
    ages = [int(row["age"]) for row in rows if row.get("age")]
    avg_age = sum(ages) / len(ages) if ages else 0
    
    return {
        "total_rows": total,
        "average_age": round(avg_age, 2),
        "min_age": min(ages) if ages else None,
        "max_age": max(ages) if ages else None
    }

stats = analyze_csv("users.csv")
print(stats)

CSV 轉 JSON

import csv
import json

def csv_to_json(csv_file, json_file):
    with open(csv_file, "r", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        data = list(reader)
    
    with open(json_file, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)

csv_to_json("data.csv", "data.json")

JSON 轉 CSV

import csv
import json

def json_to_csv(json_file, csv_file):
    with open(json_file, "r", encoding="utf-8") as f:
        data = json.load(f)
    
    if not data:
        return
    
    fieldnames = data[0].keys()
    
    with open(csv_file, "w", encoding="utf-8", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(data)

json_to_csv("data.json", "output.csv")

合併多個 CSV

import csv
from pathlib import Path

def merge_csv_files(input_dir, output_file):
    csv_files = list(Path(input_dir).glob("*.csv"))
    
    if not csv_files:
        return
    
    all_data = []
    fieldnames = None
    
    for csv_file in csv_files:
        with open(csv_file, "r", encoding="utf-8") as f:
            reader = csv.DictReader(f)
            if fieldnames is None:
                fieldnames = reader.fieldnames
            all_data.extend(list(reader))
    
    with open(output_file, "w", encoding="utf-8", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(all_data)

merge_csv_files("csv_folder", "merged.csv")

過濾 CSV 資料

import csv

def filter_csv(input_file, output_file, condition):
    """
    condition: 接受一個 row dict,回傳 True/False
    """
    with open(input_file, "r", encoding="utf-8") as f_in:
        reader = csv.DictReader(f_in)
        fieldnames = reader.fieldnames
        
        with open(output_file, "w", encoding="utf-8", newline="") as f_out:
            writer = csv.DictWriter(f_out, fieldnames=fieldnames)
            writer.writeheader()
            
            for row in reader:
                if condition(row):
                    writer.writerow(row)

# 使用:過濾年齡大於 25 的資料
filter_csv(
    "users.csv",
    "filtered.csv",
    lambda row: int(row.get("age", 0)) > 25
)

使用 pandas(推薦用於複雜操作)

對於複雜的 CSV 操作,pandas 函式庫更方便:

import pandas as pd

# 讀取 CSV
df = pd.read_csv("data.csv")

# 查看資料
print(df.head())

# 過濾
filtered = df[df["age"] > 25]

# 統計
print(df["age"].mean())

# 寫入 CSV
df.to_csv("output.csv", index=False)