My first GSoC week at Gentoo Foundation

First, some brief notion about the project I am working on – libebuild.The idea is to implement efficient and well-tested C library which provides functionality for working with Gentoo ebuilds and intended to be used in package managers.

Libebuild repo

The initial thought is to provide pure C implementation with well defined API, so that it also could be used in external tool development. As well as we might provide bindings for various languages and use library e.g with python.

Here are some difficulties that I’ve faced while trying to add python bindings based on my C code.

Let’s say I have my C struct for working with category/package/version:


typedef struct {
    char *cat;
    char *pkg;
    char *ver;
} cpv;

and some C code for allocating memory and filling up this struct:


cpv *cpv_alloc(char *cpv_string) {
    cpv *c = malloc(sizeof(cpv)+strlen(cpv_string)+1);
    char *ptr = (char*)c;
    c->cat = ptr + sizeof(cpv);
    strcpy(c->cat, cpv_string);
    c->pkg = strchr(c->cat, '/');
    *(c->pkg) = '\0';
    c->pkg++;
    c->ver = strrchr(c->pkg, '-');
    *(c->ver) = '\0';
    c->ver++;
    return c;
}

To define new python CPV type I have to implement some python object type layout:


typedef struct {
    PyObject_HEAD
    char *cat;
    char *pkg;
    char *ver;
} cpv_CPVObject;

static PyTypeObject cpv_CPVType = {
    PyObject_HEAD_INIT(NULL)
    0,                     /*ob_size*/
    "cpv.CPV",             /*tp_name*/
    sizeof(cpv_CPVObject), /*tp_basicsize*/
    0,                     /*tp_itemsize*/
    0,                     /*tp_dealloc*/
    0,                     /*tp_print*/
    0,                     /*tp_getattr*/
    0,                     /*tp_setattr*/
    0,                     /*tp_compare*/
    0,                     /*tp_repr*/
    0,                     /*tp_as_number*/
    0,                     /*tp_as_sequence*/
    0,                     /*tp_as_mapping*/
    0,                     /*tp_hash */
    0,                     /*tp_call*/
    0,                     /*tp_str*/
    0,                     /*tp_getattro*/
    0,                     /*tp_setattro*/
    0,                     /*tp_as_buffer*/
    Py_TPFLAGS_DEFAULT,    /*tp_flags*/
    "CPV objects",         /* tp_doc */
};

static PyMethodDef cpv_methods[] = {
    {NULL} /* Sentinel */
};

PyMODINIT_FUNC
initcpv(void)
{
    PyObject* m;

    cpv_Type.tp_new = PyType_GenericNew;
    if (PyType_Ready(cpv_CPVType) < 0)
        return;

    m = Py_InitModule3("cpv", cpv_methods,
        "Example module that creates an extension type.");

    Py_INCREF(&cpv_CPVType);
    PyModule_AddObject(m, "CPV", (PyObject *)&cpv_CPVType);
}

Now my struct should include PyObject_HEAD, which basically provides object type and reference counting information and also I specify sizeof(cpv_CPVObject), so that python knows how much memory to allocate when creating an instance.

So now it’s clear that I can’t just use my initial C struct to manage CPV objects in python. One solution I see is to call cpv_alloc while executing tp_init, then copy corresponding fields from struct cpv to the struct cpv_CPVObject and free struct cpv, but that seems too much of a hassle. Let’s say we want to compare two atoms, from python view we receive two PyObject* which again should be converted to pure C cpv structs to perform actual operations.
Maybe it’s possible to somehow redefine tp_new, tp_alloc.

Alright, I’ve came up with smth that seems an appropriate solution, this way when we have to use Python/C API, let python do allocations and just pass self to the cpv function, also substitute alloc family functions with python specific to use python private heap.


#ifdef PYTHONC
#define malloc  PyMem_Malloc
#define realloc PyMem_Realloc
#define free    PyMem_Free
#endif

typedef struct {
#ifdef PYTHONC
    PyObject_HEAD
#endif
    char *cat;
    char *pkg;
    char *ver;
} cpv;

#include "cpv.h"

cpv *cpv_alloc(char *cpv_string
#ifdef PYTHONC
, cpv *self
#endif
) {
    char *ptr;
#ifdef PYTHONC
    cpv *c = self;
    ptr = malloc(strlen(cpv_string)+1);
    c->cat = ptr;
#else
    cpv *c = malloc(sizeof(cpv)+strlen(cpv_string)+1);
    ptr = (char*)c;
    c->cat = ptr + sizeof(cpv);
#endif
    strcpy(c->cat, cpv_string);
    c->pkg = strchr(c->cat, '/');
    *(c->pkg) = '\0';
    c->pkg++;
    c->ver = strrchr(c->pkg, '-');
    *(c->ver) = '\0';
    c->ver++;
    return c;
}

static int
cpv_init(cpv *self, PyObject *args, PyObject *kwds)
{
    char *cpvstr;
    if (!PyArg_ParseTuple(args, "s", &cpvstr))
        return -1;
    if (!cpv_alloc(cpvstr, self)) {
        printf("Invalid cpv\n");
        return -1; 
    }
}

static PyMethodDef cpv_methods[] = {
    {NULL}  /* Sentinel */
};

static PyMemberDef cpv_members[] = {
    {"category", T_STRING, offsetof(cpv, cat), READONLY},
    {"package",  T_STRING, offsetof(cpv, pkg), READONLY},
    {"version",  T_STRING, offsetof(cpv, ver), READONLY},
    {NULL}
};


static PyTypeObject cpv_cpvType = {
    PyObject_HEAD_INIT(NULL)
    0,                         /*ob_size*/
    "cpv.CPV",                 /*tp_name*/
    sizeof(cpv),               /*tp_basicsize*/
    0,                         /*tp_itemsize*/
    0,                         /*tp_dealloc*/
    0,                         /*tp_print*/
    0,                         /*tp_getattr*/
    0,                         /*tp_setattr*/
    0,                         /*tp_compare*/
    0,                         /*tp_repr*/
    0,                         /*tp_as_number*/
    0,                         /*tp_as_sequence*/
    0,                         /*tp_as_mapping*/
    0,                         /*tp_hash */
    0,                         /*tp_call*/
    0,                         /*tp_str*/
    0,                         /*tp_getattro*/
    0,                         /*tp_setattro*/
    0,                         /*tp_as_buffer*/
    Py_TPFLAGS_DEFAULT,        /*tp_flags*/
    "cpv objects",             /* tp_doc */
    0,                     /* tp_traverse */
    0,                     /* tp_clear */
    0,                     /* tp_richcompare */
    0,                     /* tp_weaklistoffset */
    0,                     /* tp_iter */
    0,                     /* tp_iternext */
    cpv_methods,             /* tp_methods */
    cpv_members,             /* tp_members */
    0,                         /* tp_getset */
    0,                         /* tp_base */
    0,                         /* tp_dict */
    0,                         /* tp_descr_get */
    0,                         /* tp_descr_set */
    0,                         /* tp_dictoffset */
    (initproc)cpv_init,      /* tp_init */
};

PyMODINIT_FUNC
initcpv(void)
{
    PyObject* m;

    cpv_cpvType.tp_new = PyType_GenericNew;
    if (PyType_Ready(&cpv_cpvType) < 0)
        return;

    m = Py_InitModule3("cpv", cpv_methods,
                       "Example module that creates an extension type.");

    Py_INCREF(&cpv_cpvType);
    PyModule_AddObject(m, "CPV", (PyObject *)&cpv_cpvType);
}

Though the question remains open in case I want to access struct member which is itself struct since for python it should be PyObject *. I believe it’s possible to somehow hack into python memory allocation and do the right things without ugly ifdefs.

Published by

den4ix

Child hacker, unborn Gentoo dev =)