#!/usr/bin/env python3 import os import re import sys from datetime import datetime from pytz import timezone, AmbiguousTimeError, NonExistentTimeError import psycopg drive_name_re = re.compile(r'attrlog\.(.*).ata.csv') conn = psycopg.connect("dbname=smartmontools") def create_tables(): with conn.cursor() as cur: cur.execute("START TRANSACTION") cur.execute(""" CREATE TABLE device ( id serial primary key, name text, tell bigint default 0 -- Total number of attrlog lines imported since last time )""") cur.execute(""" CREATE TABLE attrlog ( time timestamptz not null, device_id integer references device(id) not null, id smallint not null, norm smallint, raw bigint, unique (time, device_id, id) ) """) cur.execute("COMMIT") def create_or_find_device(name): with conn.cursor() as cur: cur.execute("SELECT id, tell FROM device WHERE name = %s", (name,)) row = cur.fetchone() if row is None: cur.execute("INSERT INTO device (name) VALUES (%s) RETURNING id, tell", (name,)) row = cur.fetchone() return row def set_tell(device_id, tell): with conn.cursor() as cur: cur.execute("UPDATE device SET tell = %s WHERE id = %s", (tell, device_id)) def parse_attrlog_file(filename, device_id=None, start_seek=0): tz_dst = timezone("Europe/Tallinn") utc = timezone("UTC") # This is a date when smartmontools switched from UTC time to local time # That change was done in commit b75b99551368da1a8623cd76b3c67bdd3aaceddc smartmontools_update_date = datetime(2021, 9, 16, 18, 00, 00) dst = None # Is daylight saving time in effect? prev_time = None fd = open(filename) file_size = fd.seek(0, os.SEEK_END) fd.seek(start_seek) while fd.tell() != file_size: line = fd.readline() line_parts = [p for p in line.strip().split(";") if p.strip()] plain_dt = datetime.strptime(line_parts.pop(0), "%Y-%m-%d %H:%M:%S") # Debian 10 to 11 upgrade added timezone and dst support to smartmontools timestamps if plain_dt > smartmontools_update_date: tz = tz_dst else: tz = utc try: dt = tz.localize(plain_dt, is_dst=None) # We are currently in normal time cur_dst = bool(dt.dst()) if dst != cur_dst: dst = cur_dst except (AmbiguousTimeError, NonExistentTimeError): # We are in Ambiguous time where localtime cant be translated to UTC # Hack around it by tracking previous DST and time values dt = tz.localize(plain_dt, is_dst=dst) if prev_time and prev_time > dt: dt = tz.localize(plain_dt, is_dst=not dst) prev_time = dt cur_dst = bool(dt.dst()) dtu = dt.astimezone(utc) #print(plain_dt, dt, dtu, dst, cur_dst, sep='; ') if dst is None: dst = cur_dst while line_parts: id = int(line_parts.pop(0)) norm = int(line_parts.pop(0)) raw = int(line_parts.pop(0)) yield str(dtu), id, norm, raw, device_id, fd.tell() # Pretty progress indicator if fd.tell() % 100 == 0: print(f"{int(((fd.tell() - start_seek) / (file_size - start_seek))*100):>5}%", end='\r') print() def import_attrlog_file(filename): drive_name = drive_name_re.search(filename).group(1) print(drive_name) device_id, tell = create_or_find_device(drive_name) with conn.cursor() as cur: cur.execute("START TRANSACTION") # for row in parse_attrlog_file(filename, device_id): # cur.execute(""" # INSERT INTO attrlog (time, id, norm, raw, device_id) # VALUES (%s, %s, %s, %s, %s) # """, row) with cur.copy("copy attrlog (time, id, norm, raw, device_id) FROM STDIN") as copy: for row in parse_attrlog_file(filename, device_id, tell): tell = row[-1] #print(row) copy.write_row(row[:-1]) set_tell(device_id, tell) #cur.execute("ROLLBACK") cur.execute("COMMIT") if __name__ == '__main__': if len(sys.argv) < 2: create_tables() else: import_attrlog_file(sys.argv[1])