1
0
Fork 0

Handle timezones correctly

This commit is contained in:
Arti Zirk 2021-11-06 23:51:58 +02:00
parent 7b2f6e5b0e
commit b561370ee5
2 changed files with 55 additions and 17 deletions

View File

@ -3,8 +3,7 @@ import os
import re import re
import sys import sys
from datetime import datetime from datetime import datetime
from zoneinfo import ZoneInfo from pytz import timezone, AmbiguousTimeError, NonExistentTimeError
from pytz import timezone
import psycopg import psycopg
@ -45,24 +44,59 @@ def create_or_find_device_id(name):
def parse_attrlog_file(filename, device_id=None): def parse_attrlog_file(filename, device_id=None):
tz = timezone("Europe/Tallinn") tz_dst = timezone("Europe/Tallinn")
utc = timezone("UTC") utc = timezone("UTC")
# This is a date when smartmontools switched from UTC time to local time
# That change was done in commit b75b99551368da1a8623cd76b3c67bdd3aaceddc
smartmontools_update_date = datetime(2021, 9, 16, 18, 00, 00)
dst = None # Is daylight saving time in effect?
prev_time = None
fd = open(filename) fd = open(filename)
file_size = fd.seek(0, os.SEEK_END) file_size = fd.seek(0, os.SEEK_END)
fd.seek(0) fd.seek(0)
while fd.tell() != file_size: while fd.tell() != file_size:
line = fd.readline() line = fd.readline()
line_parts = [p for p in line.strip().split(";") if p.strip()] line_parts = [p for p in line.strip().split(";") if p.strip()]
#dt = line_parts.pop(0)+"+0200" plain_dt = datetime.strptime(line_parts.pop(0), "%Y-%m-%d %H:%M:%S")
dt = datetime.strptime(line_parts.pop(0), "%Y-%m-%d %H:%M:%S")#.replace(tzinfo=tz)
dtz = dt.replace(tzinfo=tz) # Debian 10 to 11 upgrade added timezone and dst support to smartmontools timestamps
if plain_dt > smartmontools_update_date:
tz = tz_dst
else:
tz = utc
try:
dt = tz.localize(plain_dt, is_dst=None)
# We are currently in normal time
cur_dst = bool(dt.dst())
if dst != cur_dst:
dst = cur_dst
except (AmbiguousTimeError, NonExistentTimeError):
# We are in Ambiguous time where localtime cant be translated to UTC
# Hack around it by tracking previous DST and time values
dt = tz.localize(plain_dt, is_dst=dst)
if prev_time and prev_time > dt:
dt = tz.localize(plain_dt, is_dst=not dst)
prev_time = dt
cur_dst = bool(dt.dst())
dtu = dt.astimezone(utc) dtu = dt.astimezone(utc)
print(dt, dtz, dtu, sep='; ') #print(plain_dt, dt, dtu, dst, cur_dst, sep='; ')
if dst is None:
dst = cur_dst
while line_parts: while line_parts:
id = int(line_parts.pop(0)) id = int(line_parts.pop(0))
norm = int(line_parts.pop(0)) norm = int(line_parts.pop(0))
raw = int(line_parts.pop(0)) raw = int(line_parts.pop(0))
yield dtu, id, norm, raw, device_id yield str(dtu), id, norm, raw, device_id
# Pretty progress indicator
if fd.tell() % 100 == 0: if fd.tell() % 100 == 0:
print(f"{int((fd.tell() / file_size)*100):>5}%", end='\r') print(f"{int((fd.tell() / file_size)*100):>5}%", end='\r')
print() print()
@ -74,15 +108,16 @@ def import_attrlog_file(filename):
device_id = create_or_find_device_id(drive_name) device_id = create_or_find_device_id(drive_name)
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute("START TRANSACTION") cur.execute("START TRANSACTION")
for row in parse_attrlog_file(filename, device_id): # for row in parse_attrlog_file(filename, device_id):
cur.execute(""" # cur.execute("""
INSERT INTO attrlog (time, id, norm, raw, device_id) # INSERT INTO attrlog (time, id, norm, raw, device_id)
VALUES (%s, %s, %s, %s, %s) # VALUES (%s, %s, %s, %s, %s)
""", row) # """, row)
# with cur.copy("copy attrlog (time, id, norm, raw, device_id) FROM STDIN") as copy: with cur.copy("copy attrlog (time, id, norm, raw, device_id) FROM STDIN") as copy:
# for row in parse_attrlog_file(filename, device_id): for row in parse_attrlog_file(filename, device_id):
# #print(row) #print(row)
# copy.write_row(row) copy.write_row(row)
#cur.execute("ROLLBACK")
cur.execute("COMMIT") cur.execute("COMMIT")

3
requirements.txt Normal file
View File

@ -0,0 +1,3 @@
psycopg==3.0.1
psycopg-binary==3.0.1
pytz==2021.3