Is `heapq.merge` stable?

I forgot a rule: should (must?) be stable, in both ways Tim mentioned.

Ok, here are my contenders.

merge_nested_states(xs, ys) is always in one of two states: Either it holds the current x and yields smaller ys through, or it holds the current y and yields smaller (or equal) xs through. Whenever one y (or x) isn’t smaller, go to the other state. Since Python doesn’t have a “goto”, I nested the two states so I run into and drop out of the second state.

merge_buffered(xs, ys) has a buffer of the next 1000 values for each input iterator. Always check which of the two buffers ends with the smaller value, merge it with the appropriate prefix of the other buffer (using list.sort), yield that merged chunk, and refill the buffers.

Benchmarks later…

Code
def merge_nested_states(xs, ys):
    xs = iter(xs)
    for x in xs:
        break
    else:
        yield from ys
        return
    ys = iter(ys)

    # Holding x
    for y in ys:
        if y < x:
            yield y
            continue
        yield x

        # Holding y
        for x in xs:
            if y < x:
                yield y
                break
            yield x

        # Ran out of xs
        else:
            yield y
            yield from ys
            return

    # Ran out of ys
    yield x
    yield from xs


from itertools import islice, chain
from bisect import bisect_left, bisect_right

def merge_buffered(xs, ys):
  xit = iter(xs)
  yit = iter(ys)
  k = 1000
  def chunks():
    xs = list(islice(xit, k))
    ys = list(islice(yit, k))
    while xs and ys:
      if ys[-1] < xs[-1]:
        i = bisect_right(xs, ys[-1])
        ys[:0] = xs[:i]
        del xs[:i]
        ys.sort()
        yield ys
        xs += islice(xit, i)
        ys[:] = islice(yit, k)
      else:
        i = bisect_left(ys, xs[-1])
        xs += ys[:i]
        del ys[:i]
        xs.sort()
        yield xs
        xs[:] = islice(xit, k)
        ys += islice(yit, i)
    yield xs or ys
    yield xit
    yield yit
  return chain.from_iterable(chunks())


funcs = [
    merge_nested_states,
    merge_buffered,
]

import random
import operator

# Testing, including stabilities
for merge in funcs * 3:
    for k in 10**3, 10**4, 10**5:
        xs = [[x] for x in sorted(random.choices(range(10**4), k=k))]
        ys = [[y] for y in sorted(random.choices(range(10**4), k=k))]
        expect = sorted(xs + ys)
        result = list(merge(iter(xs), iter(ys)))
        if result != expect:
            print(merge.__name__, 'failed miserably', len(expect), len(result))
        elif not all(map(operator.is_, result, expect)):
            print(merge.__name__, "isn't stable")

print('done')

Attempt This Online!

2 Likes