add logfile concatenation and timestamp column

This commit is contained in:
interfisch 2023-07-12 20:17:27 +02:00
parent 33a5669545
commit 07ea359c53

View file

@ -4,42 +4,114 @@ import time
import argparse
parser = argparse.ArgumentParser(description='Copys, renames and fixes logfiles written by bobbycar sd logger.')
parser.add_argument('input', type=argparse.FileType('r'))
parser.add_argument('output', nargs='?', type=argparse.FileType('w'))
parser.add_argument('--input', type=argparse.FileType('r'), nargs='+')
parser.add_argument('--output', nargs='?', type=argparse.FileType('w'))
args = parser.parse_args()
ok=True
inputFilename=args.input.name
def getTimestamp(plines):
timestampline=-1
timestampfound=False
while not timestampfound:
timestampline+=1
timestampfound = (plines[timestampline].find('TIMESTAMP:')!=-1)
timestamp=int(plines[timestampline].split('TIMESTAMP:')[1]) #timestamp when file was created
if (timestampline==-1):
print("Error: Timestamp not found!")
exit()
return timestamp
def filterLines(plines,plinesStarttime=None):
plines = [x.rstrip("\n") for x in plines] #remove \n
pcommentlinesMask = [True if x.startswith('#') else False for x in plines] #generate mask for lines with comments
plines=np.array(plines)
pcommentlinesMask=np.array(pcommentlinesMask)
if (plinesStarttime is not None): #if starttimelist given, match with pdatalinesOK
plinesStarttime = plinesStarttime[pcommentlinesMask==False] #get lines with data
pdatalines = plines[pcommentlinesMask==False] #get lines with data
pheader = pdatalines[0] #header is the first non comment line
pheaderSize = len(pheader.split(',')) #how many elements are expected per line
pdatalinesSize = [len(x.split(',')) for x in pdatalines] #count arraysize for every dataline
if (plinesStarttime is not None): #if starttimelist given, match with pdatalinesOK
plinesStarttime=plinesStarttime[np.array(pdatalinesSize)==pheaderSize]
pdatalinesOK = pdatalines[np.array(pdatalinesSize)==pheaderSize]
if (plinesStarttime is not None): #if starttimelist given, match with pdatalinesOK
plinesStarttime = [plinesStarttime[i] for i,x in enumerate(pdatalinesOK) if x != pheader]
pdatalinesOK = [x for x in pdatalinesOK if x != pheader] #exclude header from data lines
pdatalinesFail = pdatalines[np.array(pdatalinesSize)!=pheaderSize]
plinesSize = [len(x.split(',')) for x in plines] #count arraysize for every dataline
plinesOK = np.array(plinesSize)==pheaderSize #mask for okay lines (valid for data lines)
return plines,pheader,pcommentlinesMask,pdatalines,pdatalinesFail,pdatalinesOK,pheaderSize,plinesOK,plinesStarttime
inputFilenames=[x.name for x in args.input]
outputFilename=None
if args.output is not None:
outputFilename=args.output.name
print("Input Filename: "+str(inputFilename))
with open(inputFilename, 'r') as reader:
lines = reader.readlines()
lines = [x.rstrip("\n") for x in lines] #remove \n
commentlinesMask = [True if x.startswith('#') else False for x in lines] #generate mask for lines with comments
lines=[]
linesStarttime=[] #offset for every line with timestamp. will be combined to new column
header=""
for inputFilename in inputFilenames:
print("Reading "+str(inputFilename))
inputlines=[]
with open(inputFilename, 'r') as reader:
inputlines = reader.readlines()
lines=np.array(lines)
commentlinesMask=np.array(commentlinesMask)
datalines = lines[commentlinesMask==False] #get lines with data
header = datalines[0] #header is the first non comment line
lines+=inputlines
#Check Headers
_lines,_header,_,_,_,_,_,_,_=filterLines(inputlines)
if (header==""): #is first header
header=_header
headerSize = len(header.split(',')) #how many elements are expected per line
assert header==_header, "Header is different!"
datalinesSize = [len(x.split(',')) for x in datalines] #count arraysize for every dataline
datalinesOK = datalines[np.array(datalinesSize)==headerSize]
datalinesFail = datalines[np.array(datalinesSize)!=headerSize]
#datalinesSizeBin = dict((x,datalinesSize.count(x)) for x in set(datalinesSize)) #binning
#normalSize = max(datalinesSizeBin, key=datalinesSizeBin.get) #get normal element count by highest bin
_timestamp=getTimestamp(_lines)
print("Timestamp="+str(_timestamp))
_linesStarttime=[_timestamp for x in inputlines] #create as many entries with start timestamp as there are lines in the current file
linesSize = [len(x.split(',')) for x in lines] #count arraysize for every dataline
linesOK = np.array(linesSize)==headerSize #mask for okay lines (valid for data lines)
linesStarttime+=_linesStarttime
print("Line in file="+str(len(inputlines)))
assert len(lines)==len(linesStarttime), "Length of lines and linesStarttime does not match"
linesStarttime=np.array(linesStarttime)
lines,header,commentlinesMask,datalines,datalinesFail,datalinesOK,headerSize,linesOK,linesStarttime=filterLines(lines,linesStarttime)
print("Found "+str(len(lines))+" lines")
print(str(np.sum(commentlinesMask))+" comments")
@ -47,20 +119,9 @@ print(str(len(datalinesFail))+" Datalines Failed")
print(str(len(datalinesOK))+" Datalines OK")
print("Header Size is "+str(headerSize))
timestampline=-1
timestampfound=False
while not timestampfound:
timestampline+=1
timestampfound = (lines[timestampline].find('TIMESTAMP:')!=-1)
timestamp=int(lines[timestampline].split('TIMESTAMP:')[1]) #timestamp when file was created
print("Found Timestamp in line "+str(timestampline))
timestamp=getTimestamp(lines)
filetime = time.strftime('%Y%m%d_%H%M%S', time.localtime(timestamp))
if outputFilename is None:
outputFilename = filetime+".csv"
@ -77,11 +138,15 @@ print("Size commentlinesMask="+str(len(commentlinesMask)))
print("Size datalines="+str(len(datalines)))
print("Size linesOK="+str(len(linesOK)))
header="timestamp,"+header #add timestamp column
writelines = [str(linesStarttime[i]+float(x.split(',')[0]))+","+x for i,x in enumerate(datalinesOK)] #add file timestamp to line time and add column to data
linesWritten = 0
if ok:
with open(outputFilename, 'w') as writer:
for i,line in enumerate(datalinesOK):
writer.write(header+"\n") #write header
for i,line in enumerate(writelines):
writer.write(line+"\n")
linesWritten+=1