Need Help please to improve performance of this python code

My python script is taking 18sec to insert 5lacs records to couchbase database with 5 threads, However same logic implemented in C++ takes 9secs to insert the same . Can anyone please help me to improve the python code performance.

import random 
import multiprocessing 
import string 
from couchbase import Couchbase
from couchbase.cluster import Cluster, ClassicAuthenticator,PasswordAuthenticator
from couchbase.exceptions import CouchbaseError
from couchbase.n1ql import N1QLQuery
import logging,time
import ConfigParser
import io
import json
from couchbase.bucket import Bucket

threads = [] 
procs = []

def input_check():
    for x in range(0,ThreadCount):
        p = multiprocessing.Process(
                target=conf_thread,
                args=(x,))
        procs.append(p)
        p.start()
    # Wait for all worker processes to finish
    for p in procs:
        p.join()

def conf_thread(x):
    bucket = Bucket(couchbaseDBIP,password=couchbasePassword)
    bucket.n1ql_timeout=10000
    bucket.timeout=10000
    opsSequence = []
    letters = string.digits
    rand_str = ( ''.join(random.choice(letters) for i in range(1000)) )
    if OP_TYPE == 1 :
        updateproportion = 100
        readproportion = 0
    elif OP_TYPE == 2 :
        updateproportion = 0
        readproportion = 100
    elif OP_TYPE == 3 :
        updateproportion = 50
        readproportion = 50

    count=0
    for b in range(updateproportion):
        opsSequence.append(1)
        count+=1
    for b in range(readproportion):
        opsSequence.append(2)
        count+=1       

    start_time = time.time()
    readCount = 0
    insertCount = 0
    for j in range(0,NumofDoc):  // NumofDoc 100000
        k = j%100
        optype = opsSequence[k]
        c  = readCount % 5000
        d = insertCount % 10000

        if optype == 1 :
            insert_doc = "Python_"+str(x)+"_"+str(insertCount)
            #writehandler(bucket,insert_doc,x)
            list1={"Thread_id":str(x), "KTAB":"INSERT","SyncBuffer":rand_str }
            bucket.upsert(insert_doc,list1)
            insertCount = insertCount+1

        elif optype == 2:
            get_doc = "Python_"+str(x)+"_"+str(c)
            getDocument(bucket,get_doc)
            readCount+=1
        else :
            print "Invalid Operation"

    timediff = time.time() - start_time
    avgLatency  = (timediff)/(insertCount+readCount);
    opsPerSec  = 1/avgLatency;
    print("THREAD_ID :"+str(x)+",TOTAL WRITE : "+str(insertCount)+", TOTAL READ : "+str(readCount)+", TOTAL OPERATION TIME : "+str(timediff)+", AVG_LATENCY = "+str(avgLatency)+", OPS_PER_SECOND ="+str(opsPerSec))


with open("config.ini") as f:
    sample_config = f.read()
    config = ConfigParser.RawConfigParser(allow_no_value=True)
    config.readfp(io.BytesIO(sample_config))
    NumofDoc=config.getint("couchbase", "NumofDoc") // 100000
    couchbaseDBIP=config.get("couchbase", "ServerIp")
    couchbaseUserName=config.get("couchbase", "Username")
    couchbasePassword=config.get("couchbase", "Password")
    couchbaseBucketName=config.get("couchbase", "BucketName")
    ThreadCount=config.getint("couchbase", "ThreadCount") //5
    Port=config.getint("couchbase", "Port")
    OP_TYPE=config.getint("couchbase", "OP_TYPE") // 1 -insert , 2-get
    
    print "Config File name Passed :  Config.ini"
    print "ThreadCount : "+str(ThreadCount)
    print "Server IP : "+couchbaseDBIP
    print "Number of Requests per thread : "+str(NumofDoc)
    input_check()

General remarks:

  • You can exit the with statement loading “config.ini” as soon as you’ve read it (ie after f.read()). This closes the file as soon as possible
  • If it’s already in C++, why re-write it in Python? Especially if you have a performance hit. You may want the configuration in Python, but you can use Cython to access your faster C++ function
  • You should time each section individually to see what’s the slowest

I can’t run your snippet, but my guess would be the 100000-iteration for-loop with its many function calls (there are likely many more function calls in bucket.upsert). My first suggestion is call the upsert asynchronously (eg in a coroutine/generator, async (await) function, or queued thread/process)

1 Like

+1 on using Cython here if you already have a working C++ implementation. Especially for the for loop with 100000 iterations, which I’m guessing is the main bottleneck, you’re likely not going to be able to match the C++ performance with CPython alone (unless you write a C-API extension, but that would be substantially more work). I’d recommend reading the following guide on “Using C++ in Cython”.

1 Like