Hi, any help is appreciated, I am trying to delete the first row in csv files before running the full script. I added (before creating column lines) the # Delete First Row line but receiving an NameError: name ‘df’ is not defined error. I can manage to resolve it. Any ideas? thanks
import libraries
import json
import pandas as pd
import os
import sys
specify data directories
locfile = “c:/files/test/”
raw_files = os.listdir(locfile)
# Delete First Row
df.drop(index=df.index[0], axis=0, inplace=True)
Define columns - these are from JSON attributes
cols = [“Id”,“Activity”,“CreationTime”,“OrganizationId”,“RecordType”,“WorkspaceId”,“WorkSpaceName”,“Workload”,“DataflowType”,“DatasetId”,“DatasetName”,“IsSuccess”,“ObjectId”,“ItemName”,“ReportId”,“ReportName”,“ReportType”,“UserKey”,“UserId”,“UserAgent”,“ClientIP”,“AcitveUser”]
In[306]:
sub routine to accept json data and return table format array
def convDelim (j):
try:
js = json.loads(j, strict=False)
df.columns = df.columns.str.strip()
return [(js.get("Id") , js.get("Activity"), js.get("CreationTime"), js.get("OrganizationId"), str((js.get("RecordType") if js.get("RecordType") else "")),
(js.get("WorkspaceId") if js.get("WorkspaceId") else ""), (js.get("WorkSpaceName") if js.get("WorkSpaceName") else ""), (js.get("Workload") if js.get("Workload") else ""),
(js.get("DataflowType") if js.get("DataflowType") else ""), (js.get("DatasetId") if js.get("DatasetId") else "" ),
(js.get("DatasetName") if js.get("DatasetName") else "") ,
str(js.get("IsSuccess")),
(js.get("ObjectId") if js.get("ObjectId") else ""),
(js.get("ItemName") if js.get("ItemName") else "" ),
(js.get("ReportId") if js.get("ReportId") else "" ),
(js.get("ReportName") if js.get("ReportName") else ""),
(js.get("ReportType") if js.get("ReportType") else ""),
js.get("UserKey"),
js.get("UserId"),
js.get("UserAgent"),
js.get("ClientIP"),
js.get("AcitveUser"))]
except:
return [("" , "", "", "", "", "", "", "", "", "", "" ,"","","","","","","","","","")]
In[304]:
final_df = pd.DataFrame(columns=cols)
for file in (raw_files):
if file.endswith(“csv”):
print("Processing: " + file)
df = pd.read_csv(locfile + file)
#print(df.info())
df[“JsonData”] = df[“AuditData”].map(lambda x: convDelim(x))
js_arr = df[“JsonData”]
a =
for x in (js_arr):
a.append(x[0])
tmp_df = pd.DataFrame(data=a,columns=cols)
final_df = pd.concat([final_df, tmp_df])
final_df = final_df.drop_duplicates()
In[303]:
final_df.to_csv(locfile + “data_2022.csv”, index=False, header=True)