I forgot a rule: should (must?) be stable, in both ways Tim mentioned.
Ok, here are my contenders.
merge_nested_states(xs, ys)
is always in one of two states: Either it holds the current x and yields smaller ys through, or it holds the current y and yields smaller (or equal) xs through. Whenever one y (or x) isn’t smaller, go to the other state. Since Python doesn’t have a “goto”, I nested the two states so I run into and drop out of the second state.
merge_buffered(xs, ys)
has a buffer of the next 1000 values for each input iterator. Always check which of the two buffers ends with the smaller value, merge it with the appropriate prefix of the other buffer (using list.sort
), yield that merged chunk, and refill the buffers.
Benchmarks later…
Code
def merge_nested_states(xs, ys):
xs = iter(xs)
for x in xs:
break
else:
yield from ys
return
ys = iter(ys)
# Holding x
for y in ys:
if y < x:
yield y
continue
yield x
# Holding y
for x in xs:
if y < x:
yield y
break
yield x
# Ran out of xs
else:
yield y
yield from ys
return
# Ran out of ys
yield x
yield from xs
from itertools import islice, chain
from bisect import bisect_left, bisect_right
def merge_buffered(xs, ys):
xit = iter(xs)
yit = iter(ys)
k = 1000
def chunks():
xs = list(islice(xit, k))
ys = list(islice(yit, k))
while xs and ys:
if ys[-1] < xs[-1]:
i = bisect_right(xs, ys[-1])
ys[:0] = xs[:i]
del xs[:i]
ys.sort()
yield ys
xs += islice(xit, i)
ys[:] = islice(yit, k)
else:
i = bisect_left(ys, xs[-1])
xs += ys[:i]
del ys[:i]
xs.sort()
yield xs
xs[:] = islice(xit, k)
ys += islice(yit, i)
yield xs or ys
yield xit
yield yit
return chain.from_iterable(chunks())
funcs = [
merge_nested_states,
merge_buffered,
]
import random
import operator
# Testing, including stabilities
for merge in funcs * 3:
for k in 10**3, 10**4, 10**5:
xs = [[x] for x in sorted(random.choices(range(10**4), k=k))]
ys = [[y] for y in sorted(random.choices(range(10**4), k=k))]
expect = sorted(xs + ys)
result = list(merge(iter(xs), iter(ys)))
if result != expect:
print(merge.__name__, 'failed miserably', len(expect), len(result))
elif not all(map(operator.is_, result, expect)):
print(merge.__name__, "isn't stable")
print('done')