Python JSON

JSON (JavaScript Object Notation) 是一種輕量級的資料交換格式。Python 的 json 模組提供了處理 JSON 的功能。

JSON 格式

JSON 支援以下資料類型:

  • 字串:"hello"
  • 數字:1233.14
  • 布林:truefalse
  • 空值:null
  • 陣列:[1, 2, 3]
  • 物件:{"key": "value"}
{
    "name": "Alice",
    "age": 25,
    "is_student": false,
    "courses": ["Python", "JavaScript"],
    "address": {
        "city": "Taipei",
        "country": "Taiwan"
    }
}

Python 和 JSON 的對應

PythonJSON
dictobject
list, tuplearray
strstring
int, floatnumber
Truetrue
Falsefalse
Nonenull

基本用法

import json

dumps() - Python 轉 JSON 字串

data = {
    "name": "Alice",
    "age": 25,
    "courses": ["Python", "JavaScript"]
}

json_str = json.dumps(data)
print(json_str)
# {"name": "Alice", "age": 25, "courses": ["Python", "JavaScript"]}

loads() - JSON 字串轉 Python

json_str = '{"name": "Alice", "age": 25}'
data = json.loads(json_str)

print(data["name"])  # Alice
print(type(data))    # <class 'dict'>

dump() - 寫入 JSON 檔案

data = {"name": "Alice", "age": 25}

with open("data.json", "w", encoding="utf-8") as f:
    json.dump(data, f)

load() - 讀取 JSON 檔案

with open("data.json", "r", encoding="utf-8") as f:
    data = json.load(f)

print(data)

格式化輸出

indent - 縮排

data = {"name": "Alice", "age": 25, "courses": ["Python", "JavaScript"]}

# 美化輸出
print(json.dumps(data, indent=2))

輸出:

{
  "name": "Alice",
  "age": 25,
  "courses": [
    "Python",
    "JavaScript"
  ]
}

sort_keys - 排序 key

data = {"c": 3, "a": 1, "b": 2}

print(json.dumps(data, sort_keys=True))
# {"a": 1, "b": 2, "c": 3}

ensure_ascii - 處理非 ASCII 字元

data = {"name": "小明", "city": "台北"}

# 預設會將非 ASCII 字元轉為 \uXXXX
print(json.dumps(data))
# {"name": "\u5c0f\u660e", "city": "\u53f0\u5317"}

# 保留原本的字元
print(json.dumps(data, ensure_ascii=False))
# {"name": "小明", "city": "台北"}

處理特殊類型

JSON 不支援所有 Python 類型,需要自訂轉換。

處理日期時間

from datetime import datetime

data = {
    "name": "Event",
    "date": datetime.now()
}

# 直接轉換會報錯
# json.dumps(data)  # TypeError

# 方法 1:轉換為字串
data["date"] = data["date"].isoformat()
print(json.dumps(data))

# 方法 2:使用 default 參數
def json_encoder(obj):
    if isinstance(obj, datetime):
        return obj.isoformat()
    raise TypeError(f"Object of type {type(obj)} is not JSON serializable")

data = {"name": "Event", "date": datetime.now()}
print(json.dumps(data, default=json_encoder))

自訂 JSONEncoder

from datetime import datetime, date

class CustomEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, datetime):
            return obj.isoformat()
        if isinstance(obj, date):
            return obj.isoformat()
        if isinstance(obj, set):
            return list(obj)
        return super().default(obj)

data = {
    "date": datetime.now(),
    "tags": {"python", "json", "tutorial"}
}

print(json.dumps(data, cls=CustomEncoder, indent=2))

處理自訂類別

class User:
    def __init__(self, name, age):
        self.name = name
        self.age = age

# 方法 1:使用 __dict__
user = User("Alice", 25)
print(json.dumps(user.__dict__))

# 方法 2:自訂 encoder
def user_encoder(obj):
    if isinstance(obj, User):
        return {"name": obj.name, "age": obj.age}
    raise TypeError(f"Object of type {type(obj)} is not JSON serializable")

print(json.dumps(user, default=user_encoder))

# 方法 3:在類別中定義 to_dict 方法
class User:
    def __init__(self, name, age):
        self.name = name
        self.age = age
    
    def to_dict(self):
        return {"name": self.name, "age": self.age}

user = User("Alice", 25)
print(json.dumps(user.to_dict()))

JSON 字串解析

解析巢狀結構

json_str = '''
{
    "users": [
        {"name": "Alice", "age": 25},
        {"name": "Bob", "age": 30}
    ],
    "total": 2
}
'''

data = json.loads(json_str)

print(data["total"])  # 2
print(data["users"][0]["name"])  # Alice

for user in data["users"]:
    print(f"{user['name']}: {user['age']} years old")

object_hook - 自訂解析

json_str = '{"name": "Alice", "birth_date": "1999-01-15"}'

def custom_decoder(obj):
    if "birth_date" in obj:
        obj["birth_date"] = datetime.strptime(obj["birth_date"], "%Y-%m-%d")
    return obj

data = json.loads(json_str, object_hook=custom_decoder)
print(data["birth_date"])  # 1999-01-15 00:00:00
print(type(data["birth_date"]))  # <class 'datetime.datetime'>

實際範例

設定檔處理

import json
from pathlib import Path

class Config:
    def __init__(self, config_path="config.json"):
        self.config_path = Path(config_path)
        self.data = self._load()
    
    def _load(self):
        if self.config_path.exists():
            with open(self.config_path, "r", encoding="utf-8") as f:
                return json.load(f)
        return {}
    
    def save(self):
        with open(self.config_path, "w", encoding="utf-8") as f:
            json.dump(self.data, f, indent=2, ensure_ascii=False)
    
    def get(self, key, default=None):
        return self.data.get(key, default)
    
    def set(self, key, value):
        self.data[key] = value
        self.save()

# 使用
config = Config()
config.set("debug", True)
config.set("database", {"host": "localhost", "port": 5432})
print(config.get("debug"))

API 回應處理

import json

def parse_api_response(response_text):
    try:
        data = json.loads(response_text)
        
        if "error" in data:
            raise Exception(data["error"]["message"])
        
        return data.get("data")
    
    except json.JSONDecodeError as e:
        raise Exception(f"Invalid JSON: {e}")

# 模擬 API 回應
response = '{"status": "success", "data": {"users": [{"id": 1, "name": "Alice"}]}}'
result = parse_api_response(response)
print(result)

資料驗證

import json

def validate_json(json_str, required_fields):
    try:
        data = json.loads(json_str)
    except json.JSONDecodeError:
        return False, "Invalid JSON format"
    
    missing = [field for field in required_fields if field not in data]
    if missing:
        return False, f"Missing fields: {missing}"
    
    return True, data

# 使用
json_str = '{"name": "Alice", "email": "alice@example.com"}'
is_valid, result = validate_json(json_str, ["name", "email", "age"])

if is_valid:
    print("Valid:", result)
else:
    print("Invalid:", result)  # Invalid: Missing fields: ['age']

處理大型 JSON 檔案

對於大型 JSON 檔案,可以使用串流處理:

import json

# 逐行讀取 JSONL 格式(每行一個 JSON 物件)
def read_jsonl(filename):
    with open(filename, "r", encoding="utf-8") as f:
        for line in f:
            yield json.loads(line)

# 寫入 JSONL 格式
def write_jsonl(filename, data_list):
    with open(filename, "w", encoding="utf-8") as f:
        for item in data_list:
            f.write(json.dumps(item, ensure_ascii=False) + "\n")