I did a simple test which puts an uuid and a random as key to a dict, and the value to the dict is another random. If I put 10 thousand entries in the dict and sort it, it takes about 2 minutes with CPython or PyPy. numba can not run this script.
But it takes just less than 1 second in an equivalent in C++. So is Python normally hundreds times slower than C++?
fff8534b-ce71-430d-a58e-c38026756af2 0.87239 0.257589
fffad9fa-eb19-45e4-ba5e-2d303efbe5b3 0.928528 0.761864
fffeae1d-7b51-4fb0-a02e-48c51d00422d 0.816001 0.522104
…
EDIT:
My code was wrong. If I move sorted() out of loop, it is as fast as C++ version for sorting 1 million entries in dict.
import random
import math
import string
import uuid
from numba import jit
class K:
def __init__(self, a='', b=0.0):
self.a = a
self.b = b
def __hash__(self):
return hash((self.a, self.b))
def __eq__(self, other):
return (self.a, self.b) == (other.a, other.b)
def __lt__(self, other):
return (self.a, self.b) < (other.a, other.b)
@jit(nopython=True)
def f():
d = {}
for i in range(10_000):
d[K(uuid.uuid4(), random.random())] = random.random()
d = dict(sorted(d.items(), key=lambda item: item[0])) # out of loop
f()
The C++ version as following:
#include <stdlib.h>
#include <time.h>
#include <iostream>
#include <map>
#include <tuple>
using namespace std;
#include <boost/uuid/uuid.hpp>
#include <boost/uuid/uuid_io.hpp>
#include <boost/uuid/uuid_generators.hpp>
struct K {
string a;
double b;
K(string a, double b){
this->a = a;
this->b = b;
}
friend bool operator == (const K &a, const K &b){
return make_tuple(a.a, a.b) == make_tuple(b.a, b.b);
}
friend bool operator < (const K &a, const K &b){
return make_tuple(a.a, a.b) < make_tuple(b.a, b.b);
}
};
size_t f(){
srand((unsigned)time(NULL));
map<K, double> m;
for (int i = 0; i != 10000; i++){
string a = to_string(boost::uuids::random_generator()());
double b = rand() / (double)RAND_MAX;
m[K(a, b)] = rand() / (double)RAND_MAX;
}
return m.size();
}
int main(){
cout << f() << endl;
return 0;
}