Python CSV
CSV (Comma-Separated Values) 是一種常見的表格資料格式。Python 的 csv 模組提供了讀寫 CSV 檔案的功能。
CSV 格式
name,age,city
Alice,25,Taipei
Bob,30,Tokyo
Charlie,35,Seoul
讀取 CSV
使用 csv.reader
import csv
with open("data.csv", "r", encoding="utf-8") as f:
reader = csv.reader(f)
for row in reader:
print(row)
輸出:
['name', 'age', 'city']
['Alice', '25', 'Taipei']
['Bob', '30', 'Tokyo']
['Charlie', '35', 'Seoul']
跳過標題行
import csv
with open("data.csv", "r", encoding="utf-8") as f:
reader = csv.reader(f)
header = next(reader) # 讀取標題行
print(f"Header: {header}")
for row in reader:
print(row)
使用 csv.DictReader
將每一行讀取為字典:
import csv
with open("data.csv", "r", encoding="utf-8") as f:
reader = csv.DictReader(f)
for row in reader:
print(row)
輸出:
{'name': 'Alice', 'age': '25', 'city': 'Taipei'}
{'name': 'Bob', 'age': '30', 'city': 'Tokyo'}
{'name': 'Charlie', 'age': '35', 'city': 'Seoul'}
# 存取特定欄位
with open("data.csv", "r", encoding="utf-8") as f:
reader = csv.DictReader(f)
for row in reader:
print(f"{row['name']} is {row['age']} years old")
寫入 CSV
使用 csv.writer
import csv
data = [
["name", "age", "city"],
["Alice", 25, "Taipei"],
["Bob", 30, "Tokyo"],
["Charlie", 35, "Seoul"]
]
with open("output.csv", "w", encoding="utf-8", newline="") as f:
writer = csv.writer(f)
for row in data:
writer.writerow(row)
Windows 上需要加上
newline="" 避免產生多餘的空行。writerows() - 一次寫入多行
import csv
data = [
["name", "age", "city"],
["Alice", 25, "Taipei"],
["Bob", 30, "Tokyo"]
]
with open("output.csv", "w", encoding="utf-8", newline="") as f:
writer = csv.writer(f)
writer.writerows(data)
使用 csv.DictWriter
import csv
data = [
{"name": "Alice", "age": 25, "city": "Taipei"},
{"name": "Bob", "age": 30, "city": "Tokyo"},
{"name": "Charlie", "age": 35, "city": "Seoul"}
]
with open("output.csv", "w", encoding="utf-8", newline="") as f:
fieldnames = ["name", "age", "city"]
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader() # 寫入標題行
writer.writerows(data)
分隔符號和引號
自訂分隔符號
import csv
# 使用 Tab 作為分隔符號
with open("data.tsv", "r") as f:
reader = csv.reader(f, delimiter="\t")
for row in reader:
print(row)
# 使用分號作為分隔符號
with open("data.csv", "w", newline="") as f:
writer = csv.writer(f, delimiter=";")
writer.writerow(["name", "age", "city"])
引號處理
import csv
# 值包含逗號時需要引號
data = [
["name", "description"],
["Product A", "Red, large, heavy"],
["Product B", 'With "quotes"']
]
with open("output.csv", "w", newline="") as f:
writer = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
writer.writerows(data)
引號模式:
| 模式 | 說明 |
|---|---|
csv.QUOTE_MINIMAL | 只在需要時加引號(預設) |
csv.QUOTE_ALL | 所有欄位都加引號 |
csv.QUOTE_NONNUMERIC | 非數字欄位加引號 |
csv.QUOTE_NONE | 不加引號(需要設定 escapechar) |
處理編碼
UTF-8 with BOM
某些程式(如 Excel)需要 BOM:
import csv
with open("data.csv", "w", encoding="utf-8-sig", newline="") as f:
writer = csv.writer(f)
writer.writerow(["姓名", "年齡", "城市"])
writer.writerow(["小明", 25, "台北"])
讀取不同編碼
import csv
# 嘗試不同編碼
encodings = ["utf-8", "utf-8-sig", "big5", "gb2312"]
for encoding in encodings:
try:
with open("data.csv", "r", encoding=encoding) as f:
reader = csv.reader(f)
data = list(reader)
print(f"Success with {encoding}")
break
except UnicodeDecodeError:
continue
實際範例
統計 CSV 資料
import csv
def analyze_csv(filename):
with open(filename, "r", encoding="utf-8") as f:
reader = csv.DictReader(f)
rows = list(reader)
# 統計
total = len(rows)
if total == 0:
return None
# 假設有 age 欄位
ages = [int(row["age"]) for row in rows if row.get("age")]
avg_age = sum(ages) / len(ages) if ages else 0
return {
"total_rows": total,
"average_age": round(avg_age, 2),
"min_age": min(ages) if ages else None,
"max_age": max(ages) if ages else None
}
stats = analyze_csv("users.csv")
print(stats)
CSV 轉 JSON
import csv
import json
def csv_to_json(csv_file, json_file):
with open(csv_file, "r", encoding="utf-8") as f:
reader = csv.DictReader(f)
data = list(reader)
with open(json_file, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, ensure_ascii=False)
csv_to_json("data.csv", "data.json")
JSON 轉 CSV
import csv
import json
def json_to_csv(json_file, csv_file):
with open(json_file, "r", encoding="utf-8") as f:
data = json.load(f)
if not data:
return
fieldnames = data[0].keys()
with open(csv_file, "w", encoding="utf-8", newline="") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(data)
json_to_csv("data.json", "output.csv")
合併多個 CSV
import csv
from pathlib import Path
def merge_csv_files(input_dir, output_file):
csv_files = list(Path(input_dir).glob("*.csv"))
if not csv_files:
return
all_data = []
fieldnames = None
for csv_file in csv_files:
with open(csv_file, "r", encoding="utf-8") as f:
reader = csv.DictReader(f)
if fieldnames is None:
fieldnames = reader.fieldnames
all_data.extend(list(reader))
with open(output_file, "w", encoding="utf-8", newline="") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(all_data)
merge_csv_files("csv_folder", "merged.csv")
過濾 CSV 資料
import csv
def filter_csv(input_file, output_file, condition):
"""
condition: 接受一個 row dict,回傳 True/False
"""
with open(input_file, "r", encoding="utf-8") as f_in:
reader = csv.DictReader(f_in)
fieldnames = reader.fieldnames
with open(output_file, "w", encoding="utf-8", newline="") as f_out:
writer = csv.DictWriter(f_out, fieldnames=fieldnames)
writer.writeheader()
for row in reader:
if condition(row):
writer.writerow(row)
# 使用:過濾年齡大於 25 的資料
filter_csv(
"users.csv",
"filtered.csv",
lambda row: int(row.get("age", 0)) > 25
)
使用 pandas(推薦用於複雜操作)
對於複雜的 CSV 操作,pandas 函式庫更方便:
import pandas as pd
# 讀取 CSV
df = pd.read_csv("data.csv")
# 查看資料
print(df.head())
# 過濾
filtered = df[df["age"] > 25]
# 統計
print(df["age"].mean())
# 寫入 CSV
df.to_csv("output.csv", index=False)