1
0

Support importing only new data from end

This commit is contained in:
Arti Zirk 2021-11-07 20:38:26 +02:00
parent b561370ee5
commit 3c516abd9a

View File

@ -18,7 +18,8 @@ def create_tables():
cur.execute("""
CREATE TABLE device (
id serial primary key,
name text
name text,
tell bigint default 0 -- Total number of attrlog lines imported since last time
)""")
cur.execute("""
CREATE TABLE attrlog (
@ -33,17 +34,22 @@ def create_tables():
cur.execute("COMMIT")
def create_or_find_device_id(name):
def create_or_find_device(name):
with conn.cursor() as cur:
cur.execute("SELECT id FROM device WHERE name = %s", (name,))
cur.execute("SELECT id, tell FROM device WHERE name = %s", (name,))
row = cur.fetchone()
if row is None:
cur.execute("INSERT INTO device (name) VALUES (%s) RETURNING id", (name,))
cur.execute("INSERT INTO device (name) VALUES (%s) RETURNING id, tell", (name,))
row = cur.fetchone()
return row[0]
return row
def parse_attrlog_file(filename, device_id=None):
def set_tell(device_id, tell):
with conn.cursor() as cur:
cur.execute("UPDATE device SET tell = %s WHERE id = %s", (tell, device_id))
def parse_attrlog_file(filename, device_id=None, start_seek=0):
tz_dst = timezone("Europe/Tallinn")
utc = timezone("UTC")
# This is a date when smartmontools switched from UTC time to local time
@ -54,7 +60,7 @@ def parse_attrlog_file(filename, device_id=None):
fd = open(filename)
file_size = fd.seek(0, os.SEEK_END)
fd.seek(0)
fd.seek(start_seek)
while fd.tell() != file_size:
line = fd.readline()
@ -94,18 +100,17 @@ def parse_attrlog_file(filename, device_id=None):
id = int(line_parts.pop(0))
norm = int(line_parts.pop(0))
raw = int(line_parts.pop(0))
yield str(dtu), id, norm, raw, device_id
yield str(dtu), id, norm, raw, device_id, fd.tell()
# Pretty progress indicator
if fd.tell() % 100 == 0:
print(f"{int((fd.tell() / file_size)*100):>5}%", end='\r')
print(f"{int(((fd.tell() - start_seek) / (file_size - start_seek))*100):>5}%", end='\r')
print()
def import_attrlog_file(filename):
drive_name = drive_name_re.search(filename).group(1)
print(drive_name)
device_id = create_or_find_device_id(drive_name)
device_id, tell = create_or_find_device(drive_name)
with conn.cursor() as cur:
cur.execute("START TRANSACTION")
# for row in parse_attrlog_file(filename, device_id):
@ -114,9 +119,11 @@ def import_attrlog_file(filename):
# VALUES (%s, %s, %s, %s, %s)
# """, row)
with cur.copy("copy attrlog (time, id, norm, raw, device_id) FROM STDIN") as copy:
for row in parse_attrlog_file(filename, device_id):
for row in parse_attrlog_file(filename, device_id, tell):
tell = row[-1]
#print(row)
copy.write_row(row)
copy.write_row(row[:-1])
set_tell(device_id, tell)
#cur.execute("ROLLBACK")
cur.execute("COMMIT")