Support for Cross-Language Garbage Collection

Here is the minimum API that would be required to support CLGC.

/**
 * Macro: PyGC_VISIT_DFS
 * ---------------------
 * Implements a depth-first search (DFS) traversal for garbage collection. This
 * macro temporarily modifies the _gc_prev field of a garbage collection
 * header (PyGC_Head) to mark it as visited during the traversal, then calls
 * the objects tp_traverse method to recursively visit its references.
 *
 * Parameters:
 * - op: A pointer to the Python object being visited (PyObject*).
 * - visit: The callback function to be applied during traversal (visitproc).
 * - arg: Additional arguments passed to the callback function.
 *
 * Usage:
 * - This macro is used in the internal garbage collection process to analyze
 *   object references and determine reachability.
 */
#define PyGC_VISIT_DFS(op, visit, arg) \
  { AS_GC(op)->_gc_prev ^= PREV_MASK_COLLECTING; \
    tp->tp_traverse(op, visit, arg); \
    AS_GC(op)->_gc_prev |= PREV_MASK_COLLECTING; }
 
/**
 * Typedef: clgcfunc
 * ------------------
 * Defines the function signature for a callback used as a reference manager
 * during Pythons garbage collection process. This callback is invoked at
 * various phases of the generation 2 garbage collection cycle to manage foreign
 * objects and their references.
 *
 * Function Signature:
 * - int (*clgcfunc)(int phase, visitproc visit, void* args);
 *
 * Parameters:
 * - phase: Indicates the current phase of the garbage collection process.
 *   - 0: A new garbage collection cycle is beginning.
 *   - 1: The decrefs phase is complete, and objects with zero external
 *     references are subject to collection. Foreign objects should be visited
 *     at this phase to treat them as normal objects.
 *   - 2: The reachability analysis is complete. Objects not yet reachable
 *     will be collected. Foreign objects still needed should be recovered at
 *     this phase.
 *   - 3: The garbage collection cycle is completed.
 * - visit: A callback function (visitproc) used for traversing object
 *   references during the garbage collection process.
 * - args: Additional arguments passed to the callback function, typically
 *   used for context or state management.
 *
 * Returns:
 * - 0 on success.
 * - Non-zero values can be used to indicate errors or specific conditions
 *   during the garbage collection process.
 *
 * Usage:
 * - Implement this function type to define a custom reference manager for
 *   Python's garbage collector. The reference manager should handle foreign
 *   object tracking and cleanup during the specified GC phases.
 *
 */
typedef int (*clgcfunc)(int phase, visitproc visit, void* args);

/**
 * Function: PyGC_IsReachable
 * --------------------------
 * Determines whether a given Python object is reachable at the end of the
 * garbage collection reachability phase.
 *
 * Parameters:
 * - obj: A pointer to the Python object (PyObject*) being checked.
 *
 * Returns:
 * - 1 if the object is reachable.
 * - 0 if the object is not reachable.
 *
 * Notes:
 * - This function should only be called at the end of the reachability phase
 *   (phase 2). Calling it at any other time during the GC cycle will produce
 *   undefined or unexpected results.
 *
 * Usage:
 * - Use this function to verify whether an object has been marked as reachable
 *   during garbage collection.
 */
int PyGC_IsReachable(PyObject *obj);

/**
 * Function: PyGC_InstallReferenceManager
 * --------------------------------------
 * Installs a custom reference manager for the Python interpreter. The reference
 * manager integrates with Pythons garbage collector to track and manage
 * foreign objects during a generation 2 garbage collection cycle.
 *
 * Parameters:
 * - manager: A callback function (clgcfunc) that will be invoked during
 *   different phases of the garbage collection process. The callback function
 *   signature is:
 *   int manager(int phase, visitproc visit, void* args)
 *   - phase: Indicates the current phase of the garbage collection process.
 *   - visit: A callback function used for traversal during the GC process.
 *   - args: Additional arguments passed to the callback function.
 *
 * Returns:
 * - 0 on success.
 * - -1 if a reference manager is already installed.
 *
 * Notes:
 * - Only one reference manager can be installed at a time. Attempting to
 *   install a second reference manager will result in a runtime error.
 * - The reference manager is responsible for ensuring proper tracking and
 *   cleanup of foreign objects during garbage collection.
 *
 * Usage:
 * - Use this function to integrate custom foreign object tracking into Pythons
 *   garbage collector.
 */
int PyGC_InstallReferenceManager(clgcfunc manager)
{
    if (reference_manager != NULL) {
        PyErr_SetString(PyExc_RuntimeError, "Only one reference manager allowed");
        return -1;
    }
    reference_manager = manager;
    return 0;
}

And this is how it may be used.

/* 
 * Perform a depth-first search (DFS) using Pythons traversal mechanism.
 * This function analyzes relationships between foreign incoming references and foreign outgoing ones.
 */
static int internalize_trace(PyObject *op, void *arg) {
    if (PyGC_IsReachable(op))
        return 0;

    // Check if the object is a foreign reference
    PyTypeObject* tp = Py_TYPE(op);
    if (tp->tp_free == Foreign_free) {
        internalize_add(op, arg);
        return 0;
    }

    // Perform DFS traversal for objects bound for garbage collection
    PyGC_VISIT_DFS(op, internalize_trace, arg);
    return 0;
}

/* 
 * Analyze linkages between foreign references and Python objects.
 * This function is called at the end of the GC process to discover reference loops.
 */
static void internalize_analyze() {
    ForeignReference* current = references.next;
    while (current != &references) {
        PyObject* op = current->local_object;

        // Perform DFS traversal to find reference loops
        internalize_start(op, NULL);
        internalize_trace(op, NULL);
        internalize_end(op, NULL);
        current = current->next;
    }
}

/** 
 * Here is a sample of how the reference manager hooks are used.
 */
int ReferenceManager_trigger(int phase, visitproc visit, void* args)
{
    printf("trigger %d\n", phase);

    // A new GC cycle is beginning
    if (phase == 0)
    {
        renew = 0;
        skip = 0;
        return 0;
    }

    // decref phase is completing
    if (phase == 1)
    {
        visit_references(visit, args);

        // Any reachable foreign object should renew or request a lease.
        renew = 1;
        return 0;
    }

    // reachable analysis is completing
    if (phase == 2)
    {
        renew = 0;

        // Analyze reachablity and inform Java of disconnected segments.
        internalize_analyze();

        skip = 1;

        // Add our references to the reachability to keep them alive until Java terminates them
        visit_references(visit, args);
        return 0;
    }

    // gc cycle is ended.
    if (phase == 3)
    {
        // Notify Java that new leases are in force.
        renew = 0;
        skip = 0;
    }
    return 0;
}