diff --git a/attrlogimport.py b/attrlogimport.py index 4f35401..95b2455 100755 --- a/attrlogimport.py +++ b/attrlogimport.py @@ -3,8 +3,7 @@ import os import re import sys from datetime import datetime -from zoneinfo import ZoneInfo -from pytz import timezone +from pytz import timezone, AmbiguousTimeError, NonExistentTimeError import psycopg @@ -45,24 +44,59 @@ def create_or_find_device_id(name): def parse_attrlog_file(filename, device_id=None): - tz = timezone("Europe/Tallinn") + tz_dst = timezone("Europe/Tallinn") utc = timezone("UTC") + # This is a date when smartmontools switched from UTC time to local time + # That change was done in commit b75b99551368da1a8623cd76b3c67bdd3aaceddc + smartmontools_update_date = datetime(2021, 9, 16, 18, 00, 00) + dst = None # Is daylight saving time in effect? + prev_time = None + fd = open(filename) file_size = fd.seek(0, os.SEEK_END) fd.seek(0) + while fd.tell() != file_size: line = fd.readline() line_parts = [p for p in line.strip().split(";") if p.strip()] - #dt = line_parts.pop(0)+"+0200" - dt = datetime.strptime(line_parts.pop(0), "%Y-%m-%d %H:%M:%S")#.replace(tzinfo=tz) - dtz = dt.replace(tzinfo=tz) + plain_dt = datetime.strptime(line_parts.pop(0), "%Y-%m-%d %H:%M:%S") + + # Debian 10 to 11 upgrade added timezone and dst support to smartmontools timestamps + if plain_dt > smartmontools_update_date: + tz = tz_dst + else: + tz = utc + + try: + dt = tz.localize(plain_dt, is_dst=None) + + # We are currently in normal time + cur_dst = bool(dt.dst()) + if dst != cur_dst: + dst = cur_dst + except (AmbiguousTimeError, NonExistentTimeError): + # We are in Ambiguous time where localtime cant be translated to UTC + # Hack around it by tracking previous DST and time values + dt = tz.localize(plain_dt, is_dst=dst) + if prev_time and prev_time > dt: + dt = tz.localize(plain_dt, is_dst=not dst) + + prev_time = dt + cur_dst = bool(dt.dst()) + dtu = dt.astimezone(utc) - print(dt, dtz, dtu, sep='; ') + #print(plain_dt, dt, dtu, dst, cur_dst, sep='; ') + + if dst is None: + dst = cur_dst + while line_parts: id = int(line_parts.pop(0)) norm = int(line_parts.pop(0)) raw = int(line_parts.pop(0)) - yield dtu, id, norm, raw, device_id + yield str(dtu), id, norm, raw, device_id + + # Pretty progress indicator if fd.tell() % 100 == 0: print(f"{int((fd.tell() / file_size)*100):>5}%", end='\r') print() @@ -74,15 +108,16 @@ def import_attrlog_file(filename): device_id = create_or_find_device_id(drive_name) with conn.cursor() as cur: cur.execute("START TRANSACTION") - for row in parse_attrlog_file(filename, device_id): - cur.execute(""" - INSERT INTO attrlog (time, id, norm, raw, device_id) - VALUES (%s, %s, %s, %s, %s) - """, row) - # with cur.copy("copy attrlog (time, id, norm, raw, device_id) FROM STDIN") as copy: - # for row in parse_attrlog_file(filename, device_id): - # #print(row) - # copy.write_row(row) + # for row in parse_attrlog_file(filename, device_id): + # cur.execute(""" + # INSERT INTO attrlog (time, id, norm, raw, device_id) + # VALUES (%s, %s, %s, %s, %s) + # """, row) + with cur.copy("copy attrlog (time, id, norm, raw, device_id) FROM STDIN") as copy: + for row in parse_attrlog_file(filename, device_id): + #print(row) + copy.write_row(row) + #cur.execute("ROLLBACK") cur.execute("COMMIT") diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e7962f1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +psycopg==3.0.1 +psycopg-binary==3.0.1 +pytz==2021.3