Consuming object exposed via buffer protocol

I am learning about the buffer protocol (and C-API) and trying out an exercise on consuming data exposed by the buffer protocol using a module named consumer. To this end, I have defined the consumer module, a PyConsumerArray type to represent the consumed array and a consume method which should read the buffer exposed by array.array and map it to PyConsumerArray in consumer.c which looks like this:

// consumer.c
// Based on http://jakevdp.github.io/blog/2014/05/05/introduction-to-the-python-buffer-protocol/
#include <Python.h>

typedef struct {
	int *arr;
	long length;
} ConsumerArray;

void initialize_ConsumerArray(ConsumerArray *a, long length) {
	a->length = length;
	a->arr = (int*)malloc(sizeof(int) * length);
	for(int i = 0; i < length; i++)
		a->arr[i] = i;
}

void deallocate_ConsumerArray(ConsumerArray *a) {
	free(a->arr);
	a->arr = NULL;
}

char* stringify_ConsumerArray(ConsumerArray *a, int nmax) {
	char *s = (char*)malloc(nmax * 20);
	int pos = sprintf(&s[0], "[ ");
	int i;
	for(i = 0; i < a->length && i < nmax; i++){
		pos += sprintf(&s[pos], "%d ", a->arr[i]); 
	}
	if(i < a->length)
		pos += sprintf(&s[pos], "... ");
	sprintf(&s[pos], "]"); 
	return s;
}

void print_ConsumerArray(ConsumerArray *a, int nmax) {
	char *string = stringify_ConsumerArray(a, nmax);
	printf("%s", string);
}

typedef struct {
	PyObject_HEAD
	ConsumerArray arr;
} PyConsumerArray;

static void
PyConsumerArray_dealloc(PyConsumerArray* self) {
	deallocate_ConsumerArray(&self->arr);
	Py_TYPE(self)->tp_free((PyObject*)self);
}

static int
PyConsumerArray_init(PyConsumerArray *self, PyObject *args, PyObject *kwds) {
	if(self->arr.arr != NULL)
		deallocate_ConsumerArray(&self->arr);

	int length = 0;
	static char *kwlist[] = {"length", NULL};
	if(!PyArg_ParseTupleAndKeywords(args, kwds, "|i", kwlist, &length)) {
		return -1;
	}

	if (length < 0)
		length = 0;

	initialize_ConsumerArray(&self->arr, length);
	return 0;
}

static PyObject *
PyConsumerArray_str(PyConsumerArray *self) {
	char *str = stringify_ConsumerArray(&self->arr, 10);
	PyObject *ret = PyUnicode_FromString(str);
	free(str);
	return ret;
}

static PyTypeObject PyConsumerArrayType = {
    .ob_base = PyVarObject_HEAD_INIT(NULL, 0)
    .tp_name = "consumer.PyConsumerArray",
    .tp_doc = PyDoc_STR("Consumer Array objects"),
    .tp_basicsize = sizeof(PyConsumerArray),
    .tp_flags = Py_TPFLAGS_DEFAULT,
    .tp_new = PyType_GenericNew,
    .tp_dealloc = (destructor) PyConsumerArray_dealloc,
    .tp_repr = (reprfunc) PyConsumerArray_str,
    .tp_str = (reprfunc) PyConsumerArray_str,
    .tp_init = (initproc) PyConsumerArray_init,		
};

// The consume method to consumer array using buffer protocol
static PyObject *
consumer_consume(PyObject *obj, PyObject *args) {
	
	// The below raises segmentation fault. Why?
	// if (!PyObject_CheckBuffer(args)) {
	// 	PyErr_SetString(PyExc_ValueError, "The object does not support buffer protocol.");
	// }

	Py_buffer view;
	if (PyObject_GetBuffer(args, &view, PyBUF_SIMPLE) < 0) {
		PyErr_SetString(PyExc_ValueError, "Get buffer failed");
		return NULL;
	}
	
 	PyConsumerArray ca;
	ca.arr.arr = view.buf;
	ca.arr.length = (long)view.shape;
	
	// Release the buffer	
	PyBuffer_Release(&view);
	PyObject *ret = (PyObject*)&ca;
	return ret;
}

static PyMethodDef ConsumerMethods[] = {
	{"consume", consumer_consume, METH_VARARGS, "consumer method"},
	{NULL, NULL, 0, NULL}
};

static struct PyModuleDef consumer_module = {
	PyModuleDef_HEAD_INIT,
	.m_name = "consumer",
	.m_doc = "consumer module", /* module documentation */
	.m_size = -1,
	.m_methods = ConsumerMethods,
};

PyMODINIT_FUNC
PyInit_consumer(void) {
	PyObject *m;
	
	if(PyType_Ready(&PyConsumerArrayType) < 0){
		return NULL;
	}
	
	m = PyModule_Create(&consumer_module);
	if(m == NULL)
		return NULL;

	// FIND Why do we increment reference for PyConsumerArrayType here?	
	// Why can't we just AddObject to it?
	Py_INCREF(&PyConsumerArrayType);
	if(PyModule_AddObject(m, "PyConsumerArray", (PyObject*)&PyConsumerArrayType) < 0) {
		Py_DECREF(&PyConsumerArrayType);
		Py_DECREF(&m);
		return NULL;
	}

	return m;
}

I built the above code via a setup.py as:

from distutils.core import setup, Extension
setup(name="consumer", ext_modules=[Extension("consumer", ["consumer.c"])])

and tested it as:

import array
arr = array.array('i', [1, 2, 3, 4, 5])

import consumer as C
array_consumed = C.consume(arr)
print (array_consumed)

But when I tested it, I get the error

Traceback (most recent call last):
  File "/Users/arun/code/py/buffer-protocol/test2.py", line 5, in <module>
    array_consumed = C.consume(arr)
ValueError: Get buffer failed

I am not sure what mistake I am doing. Any help will be useful.

One thing to start with debugging is to change this code:

       if (PyObject_GetBuffer(args, &view, PyBUF_SIMPLE) < 0) {
		PyErr_SetString(PyExc_ValueError, "Get buffer failed");
		return NULL;
	}

This replaces the exception raised by PyObject_GetBuffer by a ValueError. I’d just remove PyErr_SetString, which should give a clearer indication of what’s going wrong.

Something else: consumer_consume keeps hold of the buffer and length of the Py_buffer after releasing it. That’s against the API contract, the buffer pointer can change when you release the view (e.g. by resizing an array.array instance).

First, I removed the PyErr_SetString and when I did that, I got a TypeError: a bytes-like object is required, not 'tuple'. Hence, I updated the consumer_consume method to parse a tuple and then read the buffer as follows:

static PyObject *
consumer_consume(PyObject *obj, PyObject *args) {
  
  PyObject *buffer_obj;
  Py_buffer view;
  if (!PyArg_ParseTuple(args, "O", &buffer_obj)) {
    return NULL;
  }

  if (PyObject_GetBuffer(buffer_obj, &view, PyBUF_SIMPLE) < 0) {
    return NULL;
  }
  
  PyConsumerArray ca;
  ca.arr.arr = view.buf;
  ca.arr.length = (long)view.shape;
  
  PyBuffer_Release(&view);
  PyObject *ret = (PyObject*)&ca;
  return ret;
}

but this throws a segmentation fault.

From my understanding, I am reading the array.array as a PyObject at the memory location pointed by buffer_obj by calling PyArg_ParseTuple(args, "O", &buffer_obj). Next, I am using the PyObject_GetBuffer method to read the buffer exposed for the object at the memory buffer_obj into the buffer view. Am I missing something here?

This code is incorrect use of the buffer protocol, you’re effectively using view.buf after calling PyBuffer_Release.

Other than that you’ll have to use a debugger to get a backtrace for the segmentation fault.

1 Like

This code is incorrect use of the buffer protocol, you’re effectively using view.buf after calling PyBuffer_Release.

If I understand it correctly, it means that I should not call PyBuffer_Release(&view) since the buffer is still in use by PyConsumerArray right?

I also made a mistake in above allocation - did not allocate the object. To fix it, I added a PyObject_New call in the above function and updated it as follows:

static PyObject *
consumer_consume(PyObject *obj, PyObject *args) {
  PyObject *buffer_obj;
  Py_buffer view;
  if (!PyArg_ParseTuple(args, "O", &buffer_obj)) {
    return NULL;
  }

  if (!PyObject_CheckBuffer(buffer_obj)) {
    PyErr_SetString(PyExc_ValueError, "The object does not support buffer protocol.");
    return NULL;
  }

  if (PyObject_GetBuffer(buffer_obj, &view, PyBUF_SIMPLE) < 0) {
    return NULL;
  }

  PyConsumerArray *ca = PyObject_New(PyConsumerArray, (PyTypeObject *)&PyConsumerArrayType);
  if(ca == NULL) {
    PyBuffer_Release(&view);
    return NULL;
  }

  ca->arr.arr = view.buf;
  ca->arr.length = (long)view.len / view.itemsize;

  return (PyObject *)ca;
}

The above prints the desired output but when python exits, it throws a malloc error:

Python(2174,0x1fc286080) malloc: *** error for object 0x1029e3670: pointer being freed was not allocated
Python(2174,0x1fc286080) malloc: *** set a breakpoint in malloc_error_break to debug

Is it that the PyConsumerArray pointer ca not being allocated (though PyObject_New was called for alocation)?

It is a naive error - I was calling free on a object which was not allocated using malloc in the method dellocate_ConsumerArray.

Thanks for the help!