Process multiple csv file

arjunaram · April 6, 2023, 7:41am

Can you let me know how i can multiple csv files present in the input.txt file. Currently, i am processing single file instead need to process multiple files.

Input files :
cat input.txt

US_input1.csv
US_input2.csv
US_input3.csv
US_input4.csv
US_input5.csv
US_input6.csv
US_input7.csv
US_input8.csv

#writing the column name

csvFile = pandas.read_csv("US_input1.csv", sep="|", nrows=1)
# displaying the contents of the CSV file
mfile = open('output.txt', 'w')
for col in csvFile.columns:
    if not search("Key", col):
         x = col.upper()
         mfile.write("%s\n" % x)
mfile.close()

kknechtel · April 6, 2023, 9:19am

Read the txt file in order to get the file names, reading each line separately, without newlines. Then use another loop around the code that you have, looping over those file names, and using them for the csvFile = pandas.read_csv("US_input1.csv", sep="|", nrows=1) line instead of the hard-coded name.

However, it’s not necessary to prepare a list ahead of time. Instead, loop directly over the file (this will give you a line from the file, with the newline, each time). Take off the newline and use the result.

This will still run into a problem: we cannot repeatedly open and close the output.txt file for writing, because the "w" mode means that the existing file will be erased each time and start over. Instead, we should open it ahead of time.

Thus:

with open('input.txt') as filenames, open('output.txt', 'w') as mfile:
    for filename in filenames:
        csvFile = pandas.read_csv(filename.strip(), sep="|", nrows=1)
        # displaying the contents of the CSV file
        for col in csvFile.columns:
            if not search("Key", col):
                 mfile.write(col.upper() + "\n")

dstromberg · April 8, 2023, 4:24am

I’m not 100% clear on what you mean by “process” a file, but if you need to handle the first row of each file, then the second row of each file, and so on, then you may want to put append each csvFile.columns to a list_, and:
for row_of_rows in zip(*list_)

Then row_of_rows[0] will be the rows of the first file, row_of_rows[1] the rows of the second file, and so on to the end of the list.

arjunaram · April 8, 2023, 4:53pm

It works now.

arjunaram · April 13, 2023, 7:50am

Currently, i am processing all the input files together. can you let me know how to dipsplay the file name as well.

US_input1.csv
US_input2.csv
US_input3.csv
US_input4.csv
US_input5.csv
US_input6.csv
US_input7.csv
US_input8.csv

#writing the column name
seen = set()
with open('inputfile.txt') as filenames, open('colfile.txt', 'w') as mfile:
    for filename in filenames:
        csvFile = pandas.read_csv(filename.strip(), sep="|", nrows=1)
        # displaying the contents of the CSV file
        for col in csvFile.columns:
            COL= col.upper()
            if not search("", COL):
               h = hash(col)
               if h not in seen:
                 mfile.write(col.upper() + "\n")
                 seen.add(h)
mfile.close()

with open('colfile.txt', 'r') as file1:
    # Read the lines from the first text file
    lines5 = file1.readlines()
file1.close()

# Convert the lines from the first text file to a set
lines5_set = set(lines5)

# Open the second text file in read mode
with open('measlisttable.txt', 'r') as file2:
    # Read the lines from the second text file
    lines10 = file2.readlines()
file2.close()
# Convert the lines from the second text file to a set
lines10_set = set(lines10)

# Find the unique lines between the two text files
missing_meas = lines5_set.difference(lines10_set)

# Iterate over the unique lines and print them
with open('output.txt', 'w') as sfile:
    for line6 in missing_meas:
        sfile.write("%s" % line6)
sfile.close()

#send email to user

SERVER1 = "user@mail.com"
TO=["test@mail.com"]
CC=["test@mail.com"]
FROM="test@mail.com"
SUBJECT = "extra field"
TEXT=""
TEXT0=""
email_file='output.txt'
fp = open(email_file,"r")
TEXT_LIST = fp.readlines()
fp.close()
email_file1='input.txt'
fp1 = open(email_file1,"r")
TEXT_LIST1 = fp1.readlines()
fp1.close()

for line in TEXT_LIST1:
    TEXT0=TEXT0+"\n"+line

for line in TEXT_LIST:
    TEXT=TEXT+"\n"+line
footer_msg=" "

header0= """
Processed Files:

"""

header= """
Below list to be added:
"""
footer = """
"""
TEXT = header0 + TEXT0 + header + TEXT + footer + footer_msg

# Prepare actual message

message = """\
From: %s
To: %s
Cc: %s
Subject: %s


%s
""" % (FROM, ", ".join(TO),", ".join(CC), SUBJECT, TEXT)

print(message)

# Send the mail
'''
server = smtplib.SMTP(SERVER1)
server.sendmail(FROM, TO, message)
server.set_debuglevel(2)
server.quit()
'''
try:
    smtpObj = smtplib.SMTP(SERVER1)
    smtpObj.sendmail(FROM, TO, message)
    smtpObj.set_debuglevel(2)
    print("Successfully sent email")
except:
    print("Error: unable to send email")

arjunaram · April 13, 2023, 9:47am

Can you let me know how to process the filename as well in the output.

with open('input.txt') as filenames, open('output.txt', 'w') as mfile:
    for filename in filenames:
        csvFile = pandas.read_csv(filename.strip(), sep="|", nrows=1)
        # displaying the contents of the CSV file
        for col in csvFile.columns:
            if not search("Key", col):
                 mfile.write(col.upper() + "\n")

Output.Txt

Col1, US_input1.csv
Col2, US_input1.csv
Col1, US_input2.csv
Col2, US_input2.csv
Col1, US_input1.csv
Col2, US_input1.csv
Col1, US_input3.csv
Col2, US_input3.csv
Col1, US_input4.csv
Col2, US_input4.csv
Col1, US_input5.csv
Col2, US_input5.csv