If you don’t actually need to pair up elements and fill in dummy values for the shorter iterable, then of course itertools.izip_longest will add overhead - because it does those things, and then the loop over that iterable has to unpack the pairs.
But if you do need those things, itertools.izip_longest will be much faster - and easier - than doing it yourself.
from itertools import repeat, zip_longest
def test_separate_iterations():
for ele in range(1000000):
pass
for ele in range(100000):
pass
def test_with_zip_longest():
for ele_1, ele_2 in zip_longest(range(1000000), range(100000)):
pass
def test_manual():
# first, the matching elements up to the shorter length
for ele_1, ele_2 in zip(range(100000), range(100000)):
pass
# then the extra elements paired with None
for ele_1, ele_2 in zip(range(100000, 1000000), repeat(None, 900000)):
pass
And if we “manually” do the work of the built-in zip, or to simulate itertools.repeat, it will get even worse:
def simulate_zip_longest(i1, i2):
i1, i2 = iter(i1), iter(i2)
while True:
try:
e1 = next(i1)
except StopIteration:
# ran out of elements from i1; output from i2 and exit
for e in i2:
yield (None, e)
return
try:
e2 = next(i2)
except StopIteration:
# ran out of elements from i2; output from i1 and exit
for e in i1:
yield (e, None)
return
# Otherwise we have an element from both iterators still
yield (e1, e2)
def test_with_simulated_zip_longest():
for ele_1, ele_2 in simulate_zip_longest(range(1000000), range(100000)):
pass
My results look like:
>>> timeit(test_separate_iterations, number=100)
1.9616155847907066
>>> timeit(test_with_zip_longest, number=100)
3.220167404972017
>>> timeit(test_manual, number=100)
3.2905724085867405
>>> timeit(test_with_simulated_zip_longest, number=100)
7.614517252892256