XML to CSV using XSLT

Hi ,
Am new to python coding so require some suggestion in code.
We are having huge XML files (we get 150 files in an hour to process out of which 20 files are 5 MB )and trying to convert to csv using XSLT .
The XML is very complex that it has various subarray ,child tags hence we have to create new XSLT for each array .
so for one master tag we ended up creating 100 xslt .

we have written while loop to process each of the xslt to csv.hence the python code will run 100 xslt in sequence manner to create 100 csv . is there any way i can run the code in parallel?

import lxml.etree as ET
import sys, getopt
import os.path
import gzip

timeit library is used to benchmark performance

from timeit import default_timer as timer

Helper to detect gzipped input python - How to tell if a file is gzip compressed? - Stack Overflow

def is_gz_file(filepath):
with open(filepath, ‘rb’) as test_f:
return test_f.read(2) == b’\x1f\x8b’

def main(argv):
# Read command-line arguments
xmlfile = ‘’
xslfile =
outfile =
opts, args = getopt.getopt(argv,“hx:s:o:”,[“xfile=”,“sfile=”,“ofile=”])
except getopt.GetoptError:
print ‘parse.py -x -s -o [ -s -o … ]’
for opt, arg in opts:
if opt == ‘-h’:
print ‘parse.py -x -s -o [ -s -o … ]’
elif opt in ("-x", “–xmlfile”):
xmlfile = arg
elif opt in ("-s", “–xslfile”):
elif opt in ("-o", “–outfile”):

# Check input xml file can be read
if not os.path.isfile(xmlfile):
    print 'cannot open xml', xmlfile

# Check if input is gz compressed and uncompress if necessary
if is_gz_file(xmlfile):
    # Expand, then parse xml input
    start = timer()
    exp = gzip.open(xmlfile, 'r')
    print timer()-start,'gzip.open',xmlfile

    start = timer()
    xml = ET.parse(exp)
    print timer()-start,'ET.parse',xmlfile
    # Parse xml input
    start = timer()
    xml = ET.parse(xmlfile)
    print timer()-start,'ET.parse',xmlfile

# iterate xslts / outputs
i = 0
words = xmlfile.split('/')
while i < len(xslfile) and i < len(outfile):
    # Check input xslt can be read
    if not os.path.isfile(xslfile[i]):
        print 'cannot open xslt', xslfile[i]

    start = timer()
    xsl = ET.parse(xslfile[i])
    print timer()-start,'ET.parse',xmlfile,xslfile[i]

    start = timer()
    transform = ET.XSLT(xsl)
    print timer()-start,'ET.XSLT',xmlfile,xslfile[i]

    start = timer()
    newdom = transform(xml,var1=ET.XSLT.strparam(words[-1]))
    print timer()-start,'transform',xmlfile,xslfile[i],len(str(newdom))
    if len(str(newdom)) > 0 : 
        with open(outfile[i], 'wb') as f:
		start = timer()
		print timer()-start,'f.write',xmlfile,xslfile[i]
    i += 1

if name == “main”:

You should look into multiprocessing.Pool, concurrent.futures.ProcessPoolExecutor, or the PyPI distribution Joblib.