Testing @rhettinger’s with lists and mine with iterators on an iterator of 10000 values each 1 MB large, using batch size n=1000:
18.82 seconds batched_as_lists
0.47 seconds batched_as_iterators
2000.1 MB peak batched_as_lists
2.0 MB peak batched_as_iterators
Code
import tracemalloc
from timeit import default_timer as time
from itertools import islice, chain
def big_objects():
for _ in range(10**4):
yield '.' * 10**6
def batched_as_lists(iterable, n):
it = iter(iterable)
while (batch := list(islice(it, n))):
yield batch
def batched_as_iterators(iterable, n):
it = iter(iterable)
for first in it:
batch = chain((first,), islice(it, n-1))
yield batch
next(islice(batch, n, n), None)
funcs = batched_as_lists, batched_as_iterators
# Small demo for correctness
for f in funcs:
print(*map(list, f(range(10), 4)))
# Speed
for f in funcs:
t = time()
for _ in f(big_objects(), 1000):
pass
print(f'{time()-t:6.2f} seconds {f.__name__}')
# Memory
for f in funcs:
tracemalloc.start()
for _ in f(big_objects(), 1000):
pass
print(f'{tracemalloc.get_traced_memory()[1] / 1e6:6.1f} MB peak {f.__name__}')
tracemalloc.stop()
del _