Main process finished before processing subprocesses during using python multiprocessing module

The code has the following format:

from Bio import SeqIO
import os
import csv
import multiprocessing as mp

os.chdir(’/Users/myDirec’) # set current working directory

strainNameIDsLst =
samplingDatesLst =

hostsLst =
countriesLst =
seqsLst =

seqLengthsLst =
aCountsLst =
cCountsLst =
gCountsLst =
tCountsLst =

def ID(line,filHandle):

return ("ID", line[2:].strip()[:8])

def DT(line, filHandle):

return ("DT", line[2:].strip()[:11])

def SQ(line, filHandle):
print(“reading a seq and return it”)
lineLst = line.split(" “)
tmpLst =
for item in lineLst:
if item.isdigit():
tmpLst.append(int(item))
crrtSeq = # keep reading until reaching “//”
line = filHandle.readline().strip()
while line !=”//": # need to test this point here
splLn = line.split()
lnLst="".join(splLn[:-1])
crrtSeq.append(lnLst)
line = filHandle.readline().strip()
print(crrtSeq)
return (“SQ”, tmpLst[0], tmpLst[1], tmpLst[2], tmpLst[3], tmpLst[4], “”.join(crrtSeq))

def DE(line,filHandle):
line = filHandle.readline()
return (“DE”, line[2:].strip().split("/")[2], line[2:].strip().split("/")[1] )

def processWrapper(chunkStart, chunkSize):

print("start processWrapper function")
with open("./test.txt") as filHandle:
    filHandle.seek(chunkStart)
    lines = filHandle.read(chunkSize).splitlines()
    print("lines length:", str(len(lines)))
    
    dict = { "ID":ID, "DT":DT, "SQ":SQ, "DE":DE }
    for line in lines:

        lineProcedure = dict.get(line[:2],0)
        if lineProcedure:
           return lineProcedure(line, filHandle)

#############

def chunkify(fname,size=200*200):
fileEnd = os.path.getsize(fname)
with open(fname,‘rb’) as f:
chunkEnd = f.tell()
while True:
chunkStart = chunkEnd
f.seek(size,1)
f.readline()
chunkEnd = f.tell()
yield chunkStart, chunkEnd - chunkStart
if chunkEnd > fileEnd:
break
################################################################################
################################################################################

def logResult(result):
if result == None:
return
if result[0] == “ID”:
strainNameIDsLst.append(result[1])
elif result[0] == “DT”:
samplingDatesLst.append(result[1])
elif result[0] == “SQ”:
seqLengthsLst.append(result[1])
aCountsLst.append(result[2])
cCountsLst.append(result[3])
gCountsLst.append(result[4])
tCountsLst.append(result[5])
seqsLst.append(result[6])
elif result[0] == “DE”:
countriesLst.append(result[1])
hostsLst.append(result[2])

if name == ‘main’:
pool = mp.Pool(mp.cpu_count()-1)
print(“cpu counts:” + str(mp.cpu_count()))
with open("./test.txt") as f:
for chunkStart, chunkSize in chunkify("./test.txt"):
pool.apply_async(processWrapper, args=(chunkStart, chunkSize), callback=logResult)
pool.close()
pool.join()

print("# seqs:" + str(len(seqsLst)))
print(" " + str(len(strainNameIDsLst)))

After running, I got the following result:

cpu counts:10
start processWrapper function
lines length: 604
start processWrapper function
lines length: 532
start processWrapper function
lines length: 568
reading a seq and return it
[‘ttccactgcttcagacacttatgcctgttggcatcattctattggatttgattacgtcta’, ‘taatccgtttatgattgatgttcaacaatggggttttacaggtaacctacaaagcaacca’, ‘tgatctttattgtcaagtccatggtaatgcacatgtagctagttgtgatgcaatcatgac’, ‘taggtgtctagctgtccacgagtgctttgttaagcgtgttgactggactattgaatatcc’, ‘tataattggtgatgaactgaagattaatgcggcttgtagaaaggttcaacacatggttgt’, ‘taaagctgcattattagcagacaaattcccagttcttcacgacattggtaaccctaaagc’, ‘tattaagtgtgtacctcaagctgatgtagaatggaagttctatgatgcacagccttgtag’, ‘tgacaaagcttataaaatagaagaattattctattcttatgccacacattctgacaaatt’, ‘cacagatggtgtatgcctattttggaattgcaatgtcgatagatatcctgctaattccat’, ‘tgtttgtagatttgacactagagtgctatctaaccttaacttgcctggttgtgatggtgg’, ‘cagtttgtatgtaaataaacatgcattccacacaccagcttttgataaaagtgcttttgt’, ‘taatttaaaacaattaccatttttctattactctgacagtccatgtgagtctcatggaaa’, ‘acaagtagtgtcagatatagattatgtaccactaaagtctgctacgtgtataacacgttg’, ‘caatttaggtggtgctgtctgtagacatcatgctaatgagtacagattgtatctcgatgc’, ‘ttataacatgatgatctcagctggctttagcttgtgggtttacaaacaatttgatactta’, ‘taacctctggaacacttttacaagacttcagagtttagaaaatgtggcttttaatgttgt’, ‘aaataagggacactttgatggacaacagggtgaagtaccagtttctatcattaataacac’, ‘tgtttacacaaaagttgatggtgttgatgtagaattgtttgaaaataaaacaacattacc’, ‘tgttaatgtagcatttgagctttgggctaagcgcaacattaaaccagtaccagaggtgaa’, ‘aatactcaataatttgggtgtggacattgctgctaatactgtgatctgggactacaaaag’, ‘agatgctccagcacatatatctactattggtgtttgttctatgactgacatagccaagaa’, ‘accaactgaaacgatttgtgcaccactcactgtcttttttgatggtagagttgatggtca’, ‘agtagacttatttagaaatgcccgtaatggtgttcttattacagaaggtagtgttaaagg’, ‘tttacaaccatctgtaggtcccaaacaagctagtcttaatggagtcacattaattggaga’, ‘agccgtaaaaacacagttcaattattataagaaagttgatggtgttgtccaacaattacc’, ‘tgaaacttactttactcagagtagaaatttacaagaatttaaacccaggagtcaaatgga’, ‘aattgatttcttagaattagctatggatgaattcattgaacggtataaattagaaggcta’, ‘tgccttcgaacatatcgtttatggagattttagtcatagtcagttaggtggtttacatct’, ‘actgattggactagctaaacgttttaaggaatcaccttttgaattagaagattttattcc’, ‘tatggacagtacagttaaaaactatttcataacagatgcgcaaacaggttcatctaagtg’, ‘tgtgtgttctgttattgatttattacttgatgattttgttgaaataataaaatcccaaga’, ‘tttatctgtagtttctaaggttgtcaaagtgactattgactatacagaaatttcatttat’, ‘gctttggtgtaaagatggccatgtagaaacattttacccaaaattacaatctagtcaagc’, ‘gtggcaaccgggtgttgctatgcctaatctttacaaaatgcaaagaatgctattagaaaa’, ‘gtgtgaccttcaaaattatggtgatagtgcaacattacctaaaggcataatgatgaatgt’, ‘cgcaaaatatactcaactgtgtcaatatttaaacacattaacattagctgtaccctataa’, ‘tatgagagttatacattttggtgctggttctgataaaggagttgcaccaggtacagctgt’, ‘tttaagacagtggttgcctacgggtacgctgcttgtcgattcagatcttaatgactttgt’, ‘ctctgatgcagattcaactttgattggtgattgtgcaactgtacatacagctaataaatg’, ‘ggatctcattattagtgatatgtacgaccctaagactaaaaatgttacaaaagaaaatga’, ‘ctctaaagagggttttttcacttacatttgtgggtttatacaacaaaagctagctcttgg’, ‘aggttccgtggctataaagataacagaacattcttggaatgctgatctttataagctcat’, ‘gggacacttcgcatggtggacagcctttgttactaatgtgaatgcgtcatcatctgaagc’, ‘atttttaattggatgtaattatcttggcaaaccacgcgaacaaatagatggttatgtcat’, ‘gcatgcaaattacatattttggaggaatacaaayccaattcagttgtcttcctattcttt’, ‘atttgacatgagtaaatttccccttaaattaaggggtactgctgttatgtctttaaaaga’, ‘aggtcaaatcaatgatatgattttatctcttcttagtaaaggtagacttataattagaga’, ‘aaacaacagagttgttatttctagtgatgttcttgttaacaactaaacgaacaatgtttg’, ‘tttttcttgttttattgccactagtctctagtcagtgtgttaatcttacaaccagaactc’, ‘aattaccccctgcatacactaattctttcacacgtggtgtttattaccctgacaaagttt’, ‘tcagatcctcagttttacattcaactcaggacttgttcttacctttcttttccaatgtta’, ‘cttggttccatgctatacatgtctctgggaccaatggtactaagaggtttgataaccctg’, ‘tcctaccatttaatgatggtgtttattttgcttccactgagaagtctaacataataagag’, ‘gctggatttttggtactactttagattcgaagacccagtccctacttattgttaataacg’, ‘ctactaatgttgttattaaagtctgtgaatttcaattttgtaatgatccatttttgggtg’, ‘tttattaccacaaaaacaacaaaagttggatggaaagtgagttcagagtttattctagtg’, ‘cgaataattgcacttttgaatatgtctctcagccttttcttatggaccttgaaggaaaac’, ‘agggtaatttcaaaaatcttagggaatttgtgtttaagaatattgatggttattttaaaa’, ‘tatattctaagcacacgcctattaatttagtgcgtgatctccctcagggtttttcggctt’, ‘tagaaccattggtagatttgccaataggtattaacatcactaggtttcaaactttacttg’, ‘ctttacatagaagttatttgactcctggtgattcttcttcaggttggacagctggtgctg’, ‘cagcttattatgtgggttatcttcaacctaggacttttctattaaaatataatgaaaatg’, ‘gaaccattacagatgctgtagactgtgcacttgaccctctctcagaaacaaagtgtacgt’, ‘tgaaatccttcactgtagaaaaaggaatctatcaaacttctaactttagagtccaaccaa’, ‘cagaatctattgttagatttcctaatattacaaacttgtgcccttttggtgaagttttta’, ‘acgccaccagatttgcatctgtttatgcttggaacaggaagagaatcagcaactgtgttg’, ‘ctgattattctgtcctatataattccgcatcattttccacttttaagtgttatggagtgt’, ‘ctcctactaaattaaatgatctctgctttactaatgtctatgcagattcatttgtaatta’, ‘gaggtgatgaagtcagacaaatcgctccagggcaaactggaaagattgctgattataatt’, ‘ataaattaccagatgattttacaggctgcgttatagcttggaattctaacaatcttgatt’, ‘ctaaggttggtggtaattataattacctgtatagattgtttaggaagtctaatctcaaac’, ‘cttttgagagagatatttcaactgaaatctatcaggccggtagcacaccttgtaatggtg’, ‘ttgaaggttttaattgttactttcctttacaatcatatggtttccaacccactaatggtg’, ‘ttggttaccaaccatacagagtagtagtactttcttttgaacttctacatgcaccagcaa’, ‘ctgtttgtggacctaaaaagtctactaatttggttaaaaacaaatgtgtcaatttcaact’, ‘tcaatggtttaacaggcacaggtgttcttactgagtctaacaaaaagtttctgcctttcc’, ‘aacaatttggcagagacattgctgacactactgatgctgtccgtgatccacagacacttg’, ‘agattcttgacattacaccatgttcttttggtggtgtcagtgttataacaccaggaacaa’, ‘atacttctaaccaggttgctgttctttatcagggtgttaactgcacagaagtccctgttg’, ‘ctattcatgcagatcaacttactcctacttggcgtgtttattctacaggttctaatgttt’, ‘ttcaaacacgtgcaggctgtttaataggggctgaacatgtcaacaactcatatgagtgtg’, ‘acatacccattggtgcaggtatatgcgctagttatcagactcagactaattctcctcggc’, ‘gggcacgtagtgtagctagtcaatccatcattgcctacactatgtcacttggtgcagaaa’, ‘attcagttgcttactctaataactctattgccatacccacaaattttactattagtgtta’, ‘ccacagaaattctaccagtgtctatgaccaagacatcagtagattgtacaatgtacattt’, ‘gtggtgattcaactgaatgcagcaatcttttgttgcaatatggcagtttttgtacacaat’, ‘taaaccgtgctttaactggaatagctgttgaacaagacaaaaacacccaagaagtttttg’, ‘cacaagtcaaacaaatttacaaaacaccaccaattaaagattttggtggttttaattttt’, ‘cacaaatattaccagatccatcaaaaccaagcaagaggtcatttattgaagatctacttt’, ‘tcaacaaagtgacacttgcagatgctggcttcatcaaacaatatggtgattgccttggtg’, ‘atattgctgctagagacctcatttgtgcacaaaagtttaacggccttactgttttgccac’, ‘ctttgctcacagatgaaatgattgctcaatacacttctgcactgttagcgggtacaatca’, ‘cttctggttggacctttggtgcaggtgctgcattacaaataccatttgctatgcaaatgg’, ‘cttataggtttaatggtattggagttacacagaatgttctctatgagaaccaaaaattga’, ‘ttgccaaccaatttaatagtgctattggcaaaattcaagactcactttcttccacagcaa’, ‘gtgcacttggaaaacttcaagatgtggtcaaccaaaatgcacaagctttaaacacgcttg’, ‘ttaaacaacttagctccaattttggtgcaatttcaagtgttttaaatgatatcctttcac’, ‘gtcttgacaaagttgaggctgaagtgcaaattgataggttgatcacaggcagacttcaaa’, ‘gtttgcagacatatgtgactcaacaattaattagagctgcagaaatcagagcttctgcta’, ‘atcttgctgctactaaaatgtcagagtgtgtacttggacaatcaaaaagagttgattttt’, ‘gtggaaagggctatcatcttatgtccttccctcagtcagcacctcatggtgtagtcttct’, ‘tgcatgtgacttatgtccctgcacaagaaaagaacttcacaactgctcctgccatttgtc’, ‘atgatggaaaagcacactttcctcgtgaaggtgtctttgtttcaaatggcacacactggt’, ‘ttgtaacacaaaggaatttttatgaaccacaaatcattactacagacaacacatttgtgt’, ‘ctggtaactgtgatgttgtaataggaattgtcaacaacacagtttatgatcctttgcaac’, ‘ctgaattagactcattcaaggaggagttagataaatattttaagaatcatacatcaccag’, ‘atgttgatttaggtgacatctctggcattaatgcttcagttgtaaacattcaaaaagaaa’, ‘ttgaccgcctcaatgaggttgccaagaatttaaatgaatctctcatcgatctccaagaac’, ‘ttggaaagtatgagcagtatataaaatggccatggtacatttggctaggttttatagctg’, ‘gcttgattgccatagtaatggtgacaattatgctttgctgtatgaccagttgctgtagtt’, ‘gtctcaagggctgttgttcttgtggatcctgctgcaaatttgatgaagacgactctgagc’, ‘cagttctcaaaggagtcaaattacattacacataaacgaacttatggatttgtttatgag’, ‘aatcttcacaattggaactgtaactttgaagcaaggtgaaatcaaggatgctactccttc’, ‘agattttgttcgcgctactgcaacgataccgatacaagcctcactccctttcggatggct’, ‘tattgttggcgttgcacttcttgctgtttttcagagcgcttccaaaatcataaccctcaa’, ‘aaagagatggcaactagcactctccaagggtgttcactttgtttgcaacttgctgttgtt’, ‘gtttgtaacagtttactcacacctyttgctcgttgctgctggccttgaagccccttttct’, ‘ctatctttatgctttagtctacttcttgcagagtataaactttgtaagaataataatgag’, ‘gctttggctttgctggaaatgccgttccaaaaacccattactttatgatgccaactattt’, ‘tctttgctggcatactaattgttacgactattgtataccttacaatagtgtaacttcttc’, ‘aattgtcattacttcaggtgatggcacaacaagtcctatttctgaacatgactaccagat’, ‘tggtggttatactgaaaaatgggaatctggagtaaaagactgtgttgtattacacagtta’, ‘cttcacttcagactattaccagctgtactcaactcaattgagtacagacactggtgttga’, ‘acatgttaccttcttcatctacaataaaattgttgatgagcctgaagaacatgtccaaat’, ‘tcacacaatcgacggttcatccggagttgttaatccagtaatggaaccaatttatgatga’, ‘accgacgacgactactagcgtgcctttgtaagcacaagctgatgagtacgaacttatgta’, ‘ctcattcgtttcggaagagacaggtacgttaatagttaatagcgtacttctttttcttgc’, ‘tttcgtggtattcttgctagttacactagccatccttactgcgcttcgattgtgtgcgta’, ‘ctgctgcaatattgttaacgtgagtcttgtaaaaccttctttttacgtttactctcgtgt’, ‘taaaaatctgaattcttctagagttcctgatcttctggtctaaacgaactaaatattata’, ‘ttagtttttctgtttggaactttaattttagccatggcagattccaacggtactattacc’, ‘gttgaagagcttaaaaagctccttgaacaatggaacctagtaataggtttcctattcctt’, ‘acatggatttgtcttctacaatttgcctatgccaacaggaataggtttttgtatataatt’, ‘aagttaattttcctctggctgttatggccagtaactttagcttgttttgtgcttgctgct’, ‘gtttacagaataaattggatcaccggtggaattgctatcgcaatggcttgtcttgtaggc’, ‘ttgatgtggctcagctacttcattgcttctttcagactgtttgcgcgtacgcgttccatg’, ‘tggtcattcaatccagaaactaacattcttctcaacgtgccactccatggcactattctg’, ‘accagaccgcttctagaaagtgaactcgtaatcggagctgtgatccttcgtggacatctt’, ‘cgtattgctggacaccatctaggacgctgtgacatcaaggacctgcctaaagaaatcact’, ‘gttgctacatcacgaacgctttcttattacaaattgggagcttcgcagcgtgtagcaggt’, ‘gactcaggttttgctgcatacagtcgctacaggattggcaactataaattaaacacagac’, ‘cattccagtagcagtgacaatattgctttgcttgtacagtaagtgacaacagatgtttca’, ‘tctcgttgactttcaggttactatagcagagatattactaattattatgaggacttttaa’, ‘agtttccatttggaatcttgattacatcataaacctcataattaaaaatttatctaagtc’, ‘actaactgagaataaatattctcaattagatgaagagcaaccaatggagattgattaaac’, ‘gaacatgaaaattattcttttcttggcactgataacactcgctacttgtgagctttatca’, ‘ctaccaagagtgtgttagaggtacaacagtacttttaaaagaaccttgctcttctggaac’, ‘atacgagggcaattcaccatttcatcctctagctgataacaaatttgcactgacttgctt’, ‘tagcactcaatttgcttttgcttgtcctgacggcgtaaaacacgtctatcagttacgtgc’, ‘cagatcagtttcacctaaactgttcatcagacaagaggaagttcaagaactttactctcc’, ‘aatttttcttattgttgcggcaatagtgtttataacactttgcttcacactcaaaagaaa’, ‘gacagaatgattgaactttcattaattgacttctatttgtgctttttagcctttctgcta’, ‘ttccttgttttaattatgcttattatcttttggttctcacttgaactgcaagatcataat’, ‘gaaacttgtcacgcctaaacgaacatgaaatttcttgttttcttaggaatcatcaaaact’, ‘gtagctgcatttcaccaagaatgtagtttacagtcatgtactcaacatcaaccatatgta’, ‘gttgatgacccgtgtcctattcacttctattctaaatggtatattagagtaggagctaga’, ‘aaatcagcacctttaattgaattgtgcgtggatgaggctggttctaaatcacccattcag’, ‘tacatcgatatcggtaattatacagtttcctgtttaccttttacaattaattgccaggaa’, ‘cctaaattgggtagtcttgtagtgcgttgttcgttctatgaagactttttagagtatcat’, ‘gacgttcgtgttgttttagatttcatctaaacgaacaaactaaaatgtctgataatggac’, ‘cccaaaatcagcgaaatgcaccccgcattacgtttggtggaccctcagattcaactggca’, ‘gtaaccagaatggagaacgcagtggggcgcgatcaaaacaacgtcggccccaaggtttac’, ‘ccaataatactgcgtcttggttcaccgctctcactcaacatggcaaggaagaccttaaat’, ‘tccctcgaggacaaggcgttccaattaacaccaatagcagtccagatgaccaaattggct’, ‘actaccgaagagctaccagacgaattcgtggtggtgacggtaaaatgaaagatctcagtc’, ‘caagatggtatttctactacctaggaactgggccagaagctggacttccctatggtgcta’, ‘acaaagacggcatcatatgggttgcaactgagggagccttgaatacaccaaaagatcaca’, ‘ttggcacccgcaatcctgctaacaatgctgcaatcgtgctacaacttcctcaaggaacaa’, ‘cattgccaaaaggcttctacgcagaagggagcagaggcggcagtcaagcctcttctcgtt’, ‘cctcatcacgtagtcgcaacagttcaagaaattcaactccaggcagcagtaaasgaactt’, ‘ctcctgctagaatggctggcaatggcggtgatgctgctcttgctttgctgctgcttgaca’, ‘gattgaaccagcttgagagcaaaatgtctggtaaaggccaacaacaacaaggccaaactg’, ‘tcactaagaaatctgctgctgaggcttctaagaagcctcggcaaaaacgtactgccacta’, ‘aagcatacaatgtaacacaagctttcggcagacgtggtccagaacaaacccaaggaaatt’, ‘ttggggaccaggaactaatcagacaaggaactgattayaaacattggccgcaaattgcac’, ‘aatttgcccccagcgcttcagcgttcttcggaatgtcgcgcattggcatggaagtcacac’, ‘cttcgggaacgtggttgacctacacaggtgccatcaaattggatgacaaagatccaaatt’, ‘tcaaagatcaagtcattttgctgaataagcatattgacgcatacaaaacattcccaccaa’, ‘cagagcctaaaaaggacaaaaagaagaaggctgatgaaactcaagccttaccgcagagac’, ‘agaagaaacagcaaactgtgactcttcttcctgctgcagatttggatgatttctccaaac’, ‘aattgcaacaatccatgagcagtgctgactcaactcaggcctaaactcatgcagaccaca’, ‘caaggcagatgggctatataaacgttttcgcttttccgtttacgatatatagtctactct’, ‘tgtgcagaatgaattctcgtaactacatagcacaagtagatgtagttaactttaatctca’, ‘catagcaatctttaatcagtgtgtaacattagggaggacttgaaagagccaccacatttt’, ‘caccgaggccacgcggagtacgatcgagtgtacagtgaacaatgctagggagagctgcct’, ‘atatggaagagccctaatgtgtaaaattaattttagtagtgctatccccatgtgatttta’, ‘atagcttcttaggagaatgacaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa’]start processWrapper function
lines length: 188

seqs:1

2
0
Traceback (most recent call last):
File “/Users/myDirec/myFile.py”, line 180, in
inputMetaTsv.writerow([strainNameIDsLst[i]] + [samplingDatesLst[i]] + [hostsLst[i]] + [countriesLst[i]] + [seqLengthsLst[i]] + [aCountsLst[i]] + [cCountsLst[i]] + [tCountsLst[i]] + [gCountsLst[i]])
IndexError: list index out of range
[Finished in 1.872s]
reading a seq and return it
reading a seq and return it
reading a seq and return it

It looks like the main process finished before processing subprocessing. Could anyone please point to some references or ideas? Many thanks.