# -*- coding: utf-8 -*-
"""
阶段 C：正式将 City 字段回填到 national_sites.db

功能：
    1. 给 NationalHistoricalandCulturalSites 表增加 City TEXT 字段（若不存在）
    2. 读取 City预测_全量.csv，按 Id 批量 UPDATE City
    3. 建立索引 idx_city（供按市筛选/聚合）
    4. 执行前自动备份数据库
    5. 执行后自检：City 非空率、按市聚合 Top 20

安全机制（两步确认）：
    - 默认只预览（不改库，不备份）
    - 加 --apply 参数并手动输入 'yes' 才真正执行

用法：
    预览：    python 落库City字段.py
    执行：    python 落库City字段.py --apply
"""

import csv
import os
import shutil
import sqlite3
import sys
from datetime import datetime

NS_DB = r'd:\BYP\Project\BYP\BYPsNotes\db\national_sites.db'
CSV_PATH = r'd:\BYP\Project\BYP\BYPsNotes\extern\attachments\topic\县以上行政区划代码\City预测_全量.csv'


def ensure_files():
    if not os.path.exists(NS_DB):
        print(f'ERROR: 数据库不存在：{NS_DB}')
        sys.exit(1)
    if not os.path.exists(CSV_PATH):
        print(f'ERROR: CSV 不存在：{CSV_PATH}')
        print('请先运行 "增强回填City.py" 生成预测结果')
        sys.exit(1)


def load_csv():
    """读入 CSV，返回 [(Id, City), ...]"""
    rows = []
    with open(CSV_PATH, 'r', encoding='utf-8-sig', newline='') as f:
        reader = csv.DictReader(f)
        for r in reader:
            try:
                rid = int(r['Id'])
            except (TypeError, ValueError):
                continue
            city = (r.get('City') or '').strip()
            rows.append((rid, city))
    return rows


def column_exists(cur, table, column):
    cur.execute(f'PRAGMA table_info({table})')
    return any(row[1] == column for row in cur.fetchall())


def index_exists(cur, idx_name):
    cur.execute("SELECT name FROM sqlite_master WHERE type='index' AND name = ?", (idx_name,))
    return cur.fetchone() is not None


def backup_db():
    """备份数据库，返回备份路径"""
    ts = datetime.now().strftime('%Y%m%d_%H%M%S')
    bak = f'{NS_DB}.bak.{ts}'
    shutil.copy2(NS_DB, bak)
    return bak


def preview():
    """预览模式：只读，不改库"""
    print('=' * 60)
    print('【预览模式】只读，不会修改数据库')
    print('=' * 60)
    rows = load_csv()
    empty_city = sum(1 for _, c in rows if not c)
    city_counter = {}
    for _, c in rows:
        if c:
            city_counter[c] = city_counter.get(c, 0) + 1

    print(f'\nCSV 来源：{CSV_PATH}')
    print(f'  总行数    ：{len(rows)}')
    print(f'  City 非空 ：{len(rows) - empty_city}')
    print(f'  City 为空 ：{empty_city}')

    print(f'\n目标数据库：{NS_DB}')
    conn = sqlite3.connect(NS_DB)
    cur = conn.cursor()
    has_col = column_exists(cur, 'NationalHistoricalandCulturalSites', 'City')
    has_idx = index_exists(cur, 'idx_city')
    cur.execute('SELECT COUNT(*) FROM NationalHistoricalandCulturalSites')
    db_total = cur.fetchone()[0]
    conn.close()
    print(f'  数据库当前记录数 ：{db_total}')
    print(f'  City 字段是否存在 ：{"是" if has_col else "否（将新增）"}')
    print(f'  idx_city 索引是否存在：{"是" if has_idx else "否（将新增）"}')

    print('\n按市聚合（Top 20）：')
    top = sorted(city_counter.items(), key=lambda x: -x[1])[:20]
    for name, cnt in top:
        print(f'  {name:<16} {cnt:>4}')

    print('\n执行计划：')
    print('  1. 备份数据库到 national_sites.db.bak.YYYYMMDD_HHMMSS')
    if not has_col:
        print('  2. ALTER TABLE ... ADD COLUMN City TEXT')
    else:
        print('  2. City 字段已存在，跳过建表')
    print(f'  3. UPDATE ... SET City = ? WHERE Id = ?  共 {len(rows)} 条')
    if not has_idx:
        print('  4. CREATE INDEX idx_city ON ... (City)')
    else:
        print('  4. idx_city 已存在，跳过建索引')
    print('  5. 自检：City 非空率、按市聚合 Top 20\n')
    print('要真正执行，请加 --apply 参数重新运行：')
    print('  python 落库City字段.py --apply')


def apply():
    """执行模式：改库"""
    print('=' * 60)
    print('【执行模式】将会修改数据库！')
    print('=' * 60)

    rows = load_csv()
    print(f'CSV 共 {len(rows)} 行')

    ans = input('\n确认执行？请输入 yes 继续：').strip()
    if ans.lower() != 'yes':
        print('已取消。')
        return

    # 1. 备份
    bak = backup_db()
    print(f'\n[1/5] 备份完成：{bak}')

    conn = sqlite3.connect(NS_DB)
    cur = conn.cursor()

    # 2. 加字段
    if not column_exists(cur, 'NationalHistoricalandCulturalSites', 'City'):
        cur.execute('ALTER TABLE NationalHistoricalandCulturalSites ADD COLUMN City TEXT')
        print('[2/5] 新增字段 City TEXT')
    else:
        print('[2/5] 字段 City 已存在，跳过')

    # 3. 批量 UPDATE
    cur.executemany(
        'UPDATE NationalHistoricalandCulturalSites SET City = ? WHERE Id = ?',
        [(city, rid) for rid, city in rows]
    )
    print(f'[3/5] UPDATE 完成，影响行数：{cur.rowcount}')

    # 4. 建索引
    if not index_exists(cur, 'idx_city'):
        cur.execute('CREATE INDEX idx_city ON NationalHistoricalandCulturalSites(City)')
        print('[4/5] 索引 idx_city 创建完成')
    else:
        print('[4/5] 索引 idx_city 已存在，跳过')

    conn.commit()

    # 5. 自检
    print('[5/5] 自检：')
    cur.execute('SELECT COUNT(*) FROM NationalHistoricalandCulturalSites')
    total = cur.fetchone()[0]
    cur.execute("SELECT COUNT(*) FROM NationalHistoricalandCulturalSites WHERE City IS NOT NULL AND City != ''")
    has_city = cur.fetchone()[0]
    print(f'  总数      ：{total}')
    print(f'  City 非空 ：{has_city}  ({has_city/total*100:.1f}%)')

    print('\n  按市聚合 Top 20：')
    cur.execute('''
        SELECT City, COUNT(*) AS cnt
        FROM NationalHistoricalandCulturalSites
        WHERE City IS NOT NULL AND City != ''
        GROUP BY City
        ORDER BY cnt DESC
        LIMIT 20
    ''')
    for name, cnt in cur.fetchall():
        print(f'    {name:<16} {cnt:>4}')

    conn.close()
    print('\n完成。若有异常，可用备份恢复：')
    print(f'  copy "{bak}" "{NS_DB}"')


def main():
    ensure_files()
    if '--apply' in sys.argv:
        apply()
    else:
        preview()


if __name__ == '__main__':
    main()
