1
0

Support importing only new data from end

This commit is contained in:
Arti Zirk 2021-11-07 20:38:26 +02:00
parent b561370ee5
commit 3c516abd9a

View File

@ -18,7 +18,8 @@ def create_tables():
cur.execute(""" cur.execute("""
CREATE TABLE device ( CREATE TABLE device (
id serial primary key, id serial primary key,
name text name text,
tell bigint default 0 -- Total number of attrlog lines imported since last time
)""") )""")
cur.execute(""" cur.execute("""
CREATE TABLE attrlog ( CREATE TABLE attrlog (
@ -33,17 +34,22 @@ def create_tables():
cur.execute("COMMIT") cur.execute("COMMIT")
def create_or_find_device_id(name): def create_or_find_device(name):
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute("SELECT id FROM device WHERE name = %s", (name,)) cur.execute("SELECT id, tell FROM device WHERE name = %s", (name,))
row = cur.fetchone() row = cur.fetchone()
if row is None: if row is None:
cur.execute("INSERT INTO device (name) VALUES (%s) RETURNING id", (name,)) cur.execute("INSERT INTO device (name) VALUES (%s) RETURNING id, tell", (name,))
row = cur.fetchone() row = cur.fetchone()
return row[0] return row
def parse_attrlog_file(filename, device_id=None): def set_tell(device_id, tell):
with conn.cursor() as cur:
cur.execute("UPDATE device SET tell = %s WHERE id = %s", (tell, device_id))
def parse_attrlog_file(filename, device_id=None, start_seek=0):
tz_dst = timezone("Europe/Tallinn") tz_dst = timezone("Europe/Tallinn")
utc = timezone("UTC") utc = timezone("UTC")
# This is a date when smartmontools switched from UTC time to local time # This is a date when smartmontools switched from UTC time to local time
@ -54,7 +60,7 @@ def parse_attrlog_file(filename, device_id=None):
fd = open(filename) fd = open(filename)
file_size = fd.seek(0, os.SEEK_END) file_size = fd.seek(0, os.SEEK_END)
fd.seek(0) fd.seek(start_seek)
while fd.tell() != file_size: while fd.tell() != file_size:
line = fd.readline() line = fd.readline()
@ -94,18 +100,17 @@ def parse_attrlog_file(filename, device_id=None):
id = int(line_parts.pop(0)) id = int(line_parts.pop(0))
norm = int(line_parts.pop(0)) norm = int(line_parts.pop(0))
raw = int(line_parts.pop(0)) raw = int(line_parts.pop(0))
yield str(dtu), id, norm, raw, device_id yield str(dtu), id, norm, raw, device_id, fd.tell()
# Pretty progress indicator # Pretty progress indicator
if fd.tell() % 100 == 0: if fd.tell() % 100 == 0:
print(f"{int((fd.tell() / file_size)*100):>5}%", end='\r') print(f"{int(((fd.tell() - start_seek) / (file_size - start_seek))*100):>5}%", end='\r')
print() print()
def import_attrlog_file(filename): def import_attrlog_file(filename):
drive_name = drive_name_re.search(filename).group(1) drive_name = drive_name_re.search(filename).group(1)
print(drive_name) print(drive_name)
device_id = create_or_find_device_id(drive_name) device_id, tell = create_or_find_device(drive_name)
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute("START TRANSACTION") cur.execute("START TRANSACTION")
# for row in parse_attrlog_file(filename, device_id): # for row in parse_attrlog_file(filename, device_id):
@ -114,9 +119,11 @@ def import_attrlog_file(filename):
# VALUES (%s, %s, %s, %s, %s) # VALUES (%s, %s, %s, %s, %s)
# """, row) # """, row)
with cur.copy("copy attrlog (time, id, norm, raw, device_id) FROM STDIN") as copy: with cur.copy("copy attrlog (time, id, norm, raw, device_id) FROM STDIN") as copy:
for row in parse_attrlog_file(filename, device_id): for row in parse_attrlog_file(filename, device_id, tell):
tell = row[-1]
#print(row) #print(row)
copy.write_row(row) copy.write_row(row[:-1])
set_tell(device_id, tell)
#cur.execute("ROLLBACK") #cur.execute("ROLLBACK")
cur.execute("COMMIT") cur.execute("COMMIT")