From 07ea359c538f117bc33631e1d6eedf47bfa2fcc9 Mon Sep 17 00:00:00 2001 From: Fisch Date: Wed, 12 Jul 2023 20:17:27 +0200 Subject: [PATCH] add logfile concatenation and timestamp column --- logdata_visualization/logfix.py | 135 +++++++++++++++++++++++--------- 1 file changed, 100 insertions(+), 35 deletions(-) diff --git a/logdata_visualization/logfix.py b/logdata_visualization/logfix.py index a603252..745f559 100644 --- a/logdata_visualization/logfix.py +++ b/logdata_visualization/logfix.py @@ -4,42 +4,114 @@ import time import argparse parser = argparse.ArgumentParser(description='Copys, renames and fixes logfiles written by bobbycar sd logger.') -parser.add_argument('input', type=argparse.FileType('r')) -parser.add_argument('output', nargs='?', type=argparse.FileType('w')) +parser.add_argument('--input', type=argparse.FileType('r'), nargs='+') +parser.add_argument('--output', nargs='?', type=argparse.FileType('w')) args = parser.parse_args() ok=True -inputFilename=args.input.name +def getTimestamp(plines): + timestampline=-1 + timestampfound=False + while not timestampfound: + timestampline+=1 + timestampfound = (plines[timestampline].find('TIMESTAMP:')!=-1) + + timestamp=int(plines[timestampline].split('TIMESTAMP:')[1]) #timestamp when file was created + + if (timestampline==-1): + print("Error: Timestamp not found!") + exit() + + return timestamp + + +def filterLines(plines,plinesStarttime=None): + + plines = [x.rstrip("\n") for x in plines] #remove \n + pcommentlinesMask = [True if x.startswith('#') else False for x in plines] #generate mask for lines with comments + + + + + plines=np.array(plines) + pcommentlinesMask=np.array(pcommentlinesMask) + + if (plinesStarttime is not None): #if starttimelist given, match with pdatalinesOK + plinesStarttime = plinesStarttime[pcommentlinesMask==False] #get lines with data + + pdatalines = plines[pcommentlinesMask==False] #get lines with data + + + + pheader = pdatalines[0] #header is the first non comment line + + pheaderSize = len(pheader.split(',')) #how many elements are expected per line + pdatalinesSize = [len(x.split(',')) for x in pdatalines] #count arraysize for every dataline + + if (plinesStarttime is not None): #if starttimelist given, match with pdatalinesOK + plinesStarttime=plinesStarttime[np.array(pdatalinesSize)==pheaderSize] + + pdatalinesOK = pdatalines[np.array(pdatalinesSize)==pheaderSize] + + if (plinesStarttime is not None): #if starttimelist given, match with pdatalinesOK + plinesStarttime = [plinesStarttime[i] for i,x in enumerate(pdatalinesOK) if x != pheader] + + pdatalinesOK = [x for x in pdatalinesOK if x != pheader] #exclude header from data lines + + + + + pdatalinesFail = pdatalines[np.array(pdatalinesSize)!=pheaderSize] + + + plinesSize = [len(x.split(',')) for x in plines] #count arraysize for every dataline + plinesOK = np.array(plinesSize)==pheaderSize #mask for okay lines (valid for data lines) + + return plines,pheader,pcommentlinesMask,pdatalines,pdatalinesFail,pdatalinesOK,pheaderSize,plinesOK,plinesStarttime + + + +inputFilenames=[x.name for x in args.input] + outputFilename=None if args.output is not None: outputFilename=args.output.name -print("Input Filename: "+str(inputFilename)) - -with open(inputFilename, 'r') as reader: - lines = reader.readlines() -lines = [x.rstrip("\n") for x in lines] #remove \n -commentlinesMask = [True if x.startswith('#') else False for x in lines] #generate mask for lines with comments +lines=[] +linesStarttime=[] #offset for every line with timestamp. will be combined to new column +header="" +for inputFilename in inputFilenames: + print("Reading "+str(inputFilename)) + inputlines=[] + with open(inputFilename, 'r') as reader: + inputlines = reader.readlines() -lines=np.array(lines) -commentlinesMask=np.array(commentlinesMask) -datalines = lines[commentlinesMask==False] #get lines with data -header = datalines[0] #header is the first non comment line + lines+=inputlines + + #Check Headers + _lines,_header,_,_,_,_,_,_,_=filterLines(inputlines) + + if (header==""): #is first header + header=_header -headerSize = len(header.split(',')) #how many elements are expected per line + assert header==_header, "Header is different!" -datalinesSize = [len(x.split(',')) for x in datalines] #count arraysize for every dataline -datalinesOK = datalines[np.array(datalinesSize)==headerSize] -datalinesFail = datalines[np.array(datalinesSize)!=headerSize] -#datalinesSizeBin = dict((x,datalinesSize.count(x)) for x in set(datalinesSize)) #binning -#normalSize = max(datalinesSizeBin, key=datalinesSizeBin.get) #get normal element count by highest bin + _timestamp=getTimestamp(_lines) + print("Timestamp="+str(_timestamp)) + _linesStarttime=[_timestamp for x in inputlines] #create as many entries with start timestamp as there are lines in the current file -linesSize = [len(x.split(',')) for x in lines] #count arraysize for every dataline -linesOK = np.array(linesSize)==headerSize #mask for okay lines (valid for data lines) + linesStarttime+=_linesStarttime + + print("Line in file="+str(len(inputlines))) + +assert len(lines)==len(linesStarttime), "Length of lines and linesStarttime does not match" + +linesStarttime=np.array(linesStarttime) +lines,header,commentlinesMask,datalines,datalinesFail,datalinesOK,headerSize,linesOK,linesStarttime=filterLines(lines,linesStarttime) print("Found "+str(len(lines))+" lines") print(str(np.sum(commentlinesMask))+" comments") @@ -47,20 +119,9 @@ print(str(len(datalinesFail))+" Datalines Failed") print(str(len(datalinesOK))+" Datalines OK") print("Header Size is "+str(headerSize)) -timestampline=-1 -timestampfound=False -while not timestampfound: - timestampline+=1 - timestampfound = (lines[timestampline].find('TIMESTAMP:')!=-1) - -timestamp=int(lines[timestampline].split('TIMESTAMP:')[1]) #timestamp when file was created - -print("Found Timestamp in line "+str(timestampline)) - - - - +timestamp=getTimestamp(lines) filetime = time.strftime('%Y%m%d_%H%M%S', time.localtime(timestamp)) + if outputFilename is None: outputFilename = filetime+".csv" @@ -77,11 +138,15 @@ print("Size commentlinesMask="+str(len(commentlinesMask))) print("Size datalines="+str(len(datalines))) print("Size linesOK="+str(len(linesOK))) +header="timestamp,"+header #add timestamp column + +writelines = [str(linesStarttime[i]+float(x.split(',')[0]))+","+x for i,x in enumerate(datalinesOK)] #add file timestamp to line time and add column to data linesWritten = 0 if ok: with open(outputFilename, 'w') as writer: - for i,line in enumerate(datalinesOK): + writer.write(header+"\n") #write header + for i,line in enumerate(writelines): writer.write(line+"\n") linesWritten+=1