Fork 0
Load smartmontools attrlog files to PostgreSQL
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

135 lines
4.4 KiB

#!/usr/bin/env python3
import os
import re
import sys
from datetime import datetime
from pytz import timezone, AmbiguousTimeError, NonExistentTimeError
import psycopg
drive_name_re = re.compile(r'attrlog\.(.*).ata.csv')
conn = psycopg.connect("dbname=smartmontools")
def create_tables():
with conn.cursor() as cur:
cur.execute("START TRANSACTION")
id serial primary key,
name text,
tell bigint default 0 -- Total number of attrlog lines imported since last time
CREATE TABLE attrlog (
time timestamptz not null,
device_id integer references device(id) not null,
id smallint not null,
norm smallint,
raw bigint,
unique (time, device_id, id)
def create_or_find_device(name):
with conn.cursor() as cur:
cur.execute("SELECT id, tell FROM device WHERE name = %s", (name,))
row = cur.fetchone()
if row is None:
cur.execute("INSERT INTO device (name) VALUES (%s) RETURNING id, tell", (name,))
row = cur.fetchone()
return row
def set_tell(device_id, tell):
with conn.cursor() as cur:
cur.execute("UPDATE device SET tell = %s WHERE id = %s", (tell, device_id))
def parse_attrlog_file(filename, device_id=None, start_seek=0):
tz_dst = timezone("Europe/Tallinn")
utc = timezone("UTC")
# This is a date when smartmontools switched from UTC time to local time
# That change was done in commit b75b99551368da1a8623cd76b3c67bdd3aaceddc
smartmontools_update_date = datetime(2021, 9, 16, 18, 00, 00)
dst = None # Is daylight saving time in effect?
prev_time = None
fd = open(filename)
file_size = fd.seek(0, os.SEEK_END)
while fd.tell() != file_size:
line = fd.readline()
line_parts = [p for p in line.strip().split(";") if p.strip()]
plain_dt = datetime.strptime(line_parts.pop(0), "%Y-%m-%d %H:%M:%S")
# Debian 10 to 11 upgrade added timezone and dst support to smartmontools timestamps
if plain_dt > smartmontools_update_date:
tz = tz_dst
tz = utc
dt = tz.localize(plain_dt, is_dst=None)
# We are currently in normal time
cur_dst = bool(dt.dst())
if dst != cur_dst:
dst = cur_dst
except (AmbiguousTimeError, NonExistentTimeError):
# We are in Ambiguous time where localtime cant be translated to UTC
# Hack around it by tracking previous DST and time values
dt = tz.localize(plain_dt, is_dst=dst)
if prev_time and prev_time > dt:
dt = tz.localize(plain_dt, is_dst=not dst)
prev_time = dt
cur_dst = bool(dt.dst())
dtu = dt.astimezone(utc)
#print(plain_dt, dt, dtu, dst, cur_dst, sep='; ')
if dst is None:
dst = cur_dst
while line_parts:
id = int(line_parts.pop(0))
norm = int(line_parts.pop(0))
raw = int(line_parts.pop(0))
yield str(dtu), id, norm, raw, device_id, fd.tell()
# Pretty progress indicator
if fd.tell() % 100 == 0:
print(f"{int(((fd.tell() - start_seek) / (file_size - start_seek))*100):>5}%", end='\r')
def import_attrlog_file(filename):
drive_name = drive_name_re.search(filename).group(1)
device_id, tell = create_or_find_device(drive_name)
with conn.cursor() as cur:
cur.execute("START TRANSACTION")
# for row in parse_attrlog_file(filename, device_id):
# cur.execute("""
# INSERT INTO attrlog (time, id, norm, raw, device_id)
# VALUES (%s, %s, %s, %s, %s)
# """, row)
with cur.copy("copy attrlog (time, id, norm, raw, device_id) FROM STDIN") as copy:
for row in parse_attrlog_file(filename, device_id, tell):
tell = row[-1]
set_tell(device_id, tell)
if __name__ == '__main__':
if len(sys.argv) < 2: