Is N-dimensional generator possible?

Hi there,

I have a generator function that generates a combination of tuples from a list of generator and argument pairs:

def gen_from(alist):
    def _gen(item):
        f, *v = item
        return f(*v)
    
    n = len(alist)
    if n == 1:
        yield from (x for x in _gen(alist[0]))
    if n == 2:
        yield from ((x, y) for x in _gen(alist[0])
                           for y in _gen(alist[1]))
    if n == 3:
        yield from ((x, y, z) for x in _gen(alist[0])
                              for y in _gen(alist[1])
                              for z in _gen(alist[2]))
    ...

if __name__ == "__main__":
    def custom_range(n):
        for x in range(n):
            print(f">>> {x=}/{n}")
            yield x
    
    alist = [ # list of generators and args
        (range, 2),
        (custom_range, 2),
    ]
    for p in gen_from(alist):
        print(p)

which outputs:

>>> x=0/2
(0, 0)
>>> x=1/2
(0, 1)
>>> x=0/2
(1, 0)
>>> x=1/2
(1, 1)

Is it possible to generalize it to a n-dimensional generator?
I cannot use itertools.product because generators should be created for each loop.

Something like this?

def  nn(*args):
    yield from zip(*args)

Works with lists or tuples as arguments and with generators.

>>> list(nn([1,2,3], [4,5,6], [7,8,9]))
[(1, 4, 7), (2, 5, 8), (3, 6, 9)]
>>> list(nn(range(0, 4, 1), range(0, 8, 2), range(0, 12, 3)))
[(0, 0, 0), (1, 2, 3), (2, 4, 6), (3, 6, 9)]

To be clear, the problem is that you want the innermost generator to be “restarted” each time, and not reuse the values that were calculated the first time? Instead of the itertools.product result like so?

>>> for p in itertools.product(range(2), custom_range(2)): print(p)
... 
>>> x=0/2
>>> x=1/2
(0, 0)
(0, 1)
(1, 0)
(1, 1)

No, what I want is all combinations of the given lists.

Yes, that’s exactly the problem I ran into.
Thank you both!

I can’t think of a way to adapt itertools.product for this special need, or any other tools that are useful here.

But the brute-force, general-purpose approach to “how can I have an arbitrary number of loops?” is to recurse on the number of loops.

In your case that means making a recursive generator, and recursing on alist to handle one element at a time. Something like:

def _gen(item):
    f, *v = item
    return f(*v)

def gen_from(alist):
    try:
        first, *rest = alist
    except ValueError: # not enough values to unpack
        yield () # base case for zero input generators
    else:
        yield from (
            (x, *y)
            for x in _gen(first)
            for y in gen_from(rest)
        )
1 Like

This is truly the solution I wanted, so cool! Thank you very much!!
I couldn’t think of a way to express it as (x, *y) in a recursive way.

Why do you want that? What is the actual problem with itertools.product?

As explained upthread, while computing the Cartesian product, OP needs the inner generators to be re-created and re-evaluated for each value in the outer generators. Ordinarily that would be wasteful and itertools.product won’t do it.

I think the question is why that is desired. One possible reason is if one or more of the generators produce very large elements that cannot all fit in memory. Recreating the objects might use more IO or CPU but conserve memory.

The generators are called from a thread.
Therefore, I want to check the event flag at every step to see if the caller wants to interrupt it.
Each loop also needs to communicate with some equipment, such as changing the camera’s exposure time.

In that case, if that’s the only issue to resolve… wouldn’t it work to just check the event flag inside the loop that iterates over the product?

Hi Karl,

You are right if I would only want to check the event flag like this:

def custom_range(n):
    for x in range(n):
        print(f">>> {x=}/{n}") # communication to device
        yield x

def run():
    for p in itertools.product(range(2), custom_range(2), ):
        # Check for interruptions
        # do some heavy prorcess...
        print(p)

But I also want to include device setup in the loop, thus the generators must be recreated each time, and not reused that were calculated the first time.

Two more ways, inspired by itertools.product’s “roughly equivalent code”:

def gen_from(factors):
    product = iter([()])
    for factor in factors:
        product = (
            p + (x,)
            for product, (f, *args) in [(product, factor)]
            for p in product
            for x in f(*args)
        )
    return product
from functools import reduce
from operator import call

def gen_from(factors):
    return reduce(
        lambda product, factor: (
            p + (x,)
            for p in product
            for x in call(*factor)
        ),
        factors,
        iter([()])
    )

An itertools contraption for 2-dimensional (trying to do something like this for N-dimensional…):

from itertools import repeat, starmap, chain
from operator import call

def gen_from(factors):
    return chain.from_iterable(map(
        zip,
        map(repeat, call(*factors[0])),
        starmap(call, repeat(factors[1]))
    ))
1 Like

Another, keeping a list of iterators and a list of values, always fetching the rightmost next value and resetting the iterators on it’s right (I don’t like it, but had to try it):

def gen_from(factors):
  iters = [iter(f(*args)) for f, *args in factors]
  vals = list(map(next, iters))
  while True:
    yield tuple(vals)
    for i, it in enumerate(reversed(iters)):
      for vals[~i] in it:
        break
      else:
        continue
      break
    else:
      break
    while i:
      i -= 1
      iters[~i] = iter(call(*factors[~i]))
      vals[~i] = next(iters[~i])

Hi Stefan,

I ended up with a modified version of @kknechtel’s code:

def gen_from(alist):
    if alist:
        (f, *v), *rest = alist
        yield from ((x, *y) for x in f(*v) for y in gen_from(rest))
    else:
        # Check sentinel
        yield ()

But I like your code too. It looks like lisp :laughing:

(gen_from := lambda factors:
    (chain.from_iterable
        (map (zip, map(repeat, call(*factors[0])),
                   starmap(call, repeat(factors[1]))))))

I might remove the extra generator wrappers, i.e., not do yield from genexp. Bit faster.

A benchmark with input alist = [(range, 300)] * 2:

  1.70 ± 0.01 ms  Stefan3
  8.52 ± 0.12 ms  Stefan2
  8.52 ± 0.18 ms  Stefan1
 33.01 ± 0.35 ms  Stefan4
 40.47 ± 0.44 ms  Karlish_no_wrap
 48.25 ± 0.36 ms  Karlish
121.88 ± 0.63 ms  Karl

Python: 3.11.4 (main, Sep  9 2023, 15:09:21) [GCC 13.2.1 20230801]

Stefan3 is the 2D one, so can’t participate in input alist = [(range, 10)] * 5:

 13.35 ± 0.11 ms  Stefan2
 13.37 ± 0.10 ms  Stefan1
 46.54 ± 0.66 ms  Stefan4
102.34 ± 2.39 ms  Karlish_no_wrap
138.21 ± 1.05 ms  Karlish
233.47 ± 1.98 ms  Karl

Python: 3.11.4 (main, Sep  9 2023, 15:09:21) [GCC 13.2.1 20230801]
Benchmark script
from timeit import timeit
from statistics import mean, stdev
from operator import truth, not_
from collections import deque
from itertools import repeat
import sys


def Stefan1(factors):
    product = iter([()])
    for factor in factors:
        product = (
            p + (x,)
            for product, (f, *args) in [(product, factor)]
            for p in product
            for x in f(*args)
        )
    return product


from functools import reduce
from operator import call

def Stefan2(factors):
    return reduce(
        lambda product, factor: (
            p + (x,)
            for p in product
            for x in call(*factor)
        ),
        factors,
        iter([()])
    )


from itertools import repeat, starmap, chain
from operator import call

def Stefan3(factors):
    return chain.from_iterable(map(
        zip,
        map(repeat, call(*factors[0])),
        starmap(call, repeat(factors[1]))
    ))


def Stefan4(factors):
  iters = [iter(f(*args)) for f, *args in factors]
  vals = list(map(next, iters))
  while True:
    yield tuple(vals)
    for i, it in enumerate(reversed(iters)):
      for vals[~i] in it:
        break
      else:
        continue
      break
    else:
      break
    while i:
      i -= 1
      iters[~i] = iter(call(*factors[~i]))
      vals[~i] = next(iters[~i])


def _gen(item):
    f, *v = item
    return f(*v)

def Karl(alist):
    try:
        first, *rest = alist
    except ValueError: # not enough values to unpack
        yield () # base case for zero input generators
    else:
        yield from (
            (x, *y)
            for x in _gen(first)
            for y in Karl(rest)
        )


def Karlish(alist):
    if alist:
        (f, *v), *rest = alist
        yield from ((x, *y) for x in f(*v) for y in Karlish(rest))
    else:
        # Check sentinel
        yield ()


def Karlish_no_wrap(alist):
    if alist:
        (f, *v), *rest = alist
        for x in f(*v):
            for y in Karlish_no_wrap(rest):
                yield (x, *y)
    else:
        # Check sentinel
        yield ()


funcs = Stefan1, Stefan2, Stefan3, Stefan4, Karl, Karlish, Karlish_no_wrap
alist = [(range, 300)] * 2

funcs = Stefan1, Stefan2, Stefan4, Karl, Karlish, Karlish_no_wrap
alist = [(range, 10)] * 5

times = {f: [] for f in funcs}
def stats(f):
    ts = [t * 1e3 for t in sorted(times[f])[:5]]
    return f'{mean(ts):6.2f} ± {stdev(ts):4.2f} ms '
for _ in range(25):
    for f in funcs:
        t = timeit(lambda: deque(f(alist), 0), number=1)
        times[f].append(t)
for f in sorted(funcs, key=stats):
    print(stats(f), f.__name__)

print('\nPython:', sys.version)

Attempt This Online!

Just to be clear, I am not at all surprised by the poor performance of my version :wink: