#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
verify_month_csv.py
Audit a monthly CSV (OKX candles) for completeness and gaps.
"""

import argparse, datetime, calendar

def ms_to_dt(ms):
    return datetime.datetime.fromtimestamp(ms/1000, tz=datetime.timezone.utc)

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--file", required=True)
    ap.add_argument("--year", type=int, required=True)
    ap.add_argument("--month", type=int, required=True)
    ap.add_argument("--step-minutes", type=int, default=5)
    args = ap.parse_args()

    last_day = calendar.monthrange(args.year, args.month)[1]
    expected = last_day * 24 * (60 // args.step_minutes)

    ts = []
    with open(args.file, "r", encoding="utf-8") as f:
        for line in f:
            line=line.strip()
            if not line: continue
            ts.append(int(line.split(",")[0]))
    ts_sorted = sorted(set(ts))

    if not ts_sorted:
        print("EMPTY")
        return 2

    min_ts, max_ts = ts_sorted[0], ts_sorted[-1]
    print("rows_raw=%d rows_unique=%d expected=%d" % (len(ts), len(ts_sorted), expected))
    print("min=%d (%s)" % (min_ts, ms_to_dt(min_ts).isoformat()))
    print("max=%d (%s)" % (max_ts, ms_to_dt(max_ts).isoformat()))

    step_ms = args.step_minutes * 60 * 1000
    gaps = 0
    max_gap = 0
    for a,b in zip(ts_sorted, ts_sorted[1:]):
        d = b-a
        if d != step_ms:
            gaps += 1
            if d > max_gap: max_gap = d
    print("gap_segments=%d max_gap_minutes=%.1f" % (gaps, max_gap/60000.0 if max_gap else 0.0))

    # What should exact month bounds be?
    start_dt = datetime.datetime(args.year, args.month, 1, 0,0,0, tzinfo=datetime.timezone.utc)
    end_dt = datetime.datetime(args.year, args.month, last_day, 23,55,0, tzinfo=datetime.timezone.utc)
    print("expected_start=%s" % start_dt.isoformat())
    print("expected_end=%s" % end_dt.isoformat())
    return 0

if __name__ == "__main__":
    raise SystemExit(main())
