Skip to content

API Reference

compute(A, raw=False)

Compute the counts fo the Fast Graphlet Transform.

Parameters:

Name Type Description Default
A Union[Graph, csc_matrix]

Either the graph as a networkx.Graph object or the adjacency matrix of the graph in scipy.sparse.csc_matrix format.

required
raw bool

If True, return both the raw and the net counts of the graphlets. If False, then return only the normalized counts. Defaults to False.

False

Accepts either an undirected, unweighted NetworkX graph or a CSC sparse matrix. If a NetworkX graph is provided, converts it to a CSC adjacency matrix. If a CSC matrix is provided, verifies that it is unweighted and symmetric.

Returns:

Name Type Description
F DataFrame

A dataframe with the net counts of the graphlets.

F_raw DataFrame

A dataframe with the raw counts of the graphlets (if raw=True).

Source code in pyfglt/fglt.py
@typechecked
def compute(A: Union[nx.Graph, csc_matrix], raw: bool = False) -> Union[pd.DataFrame, tuple[pd.DataFrame, pd.DataFrame]]:
    """Compute the counts fo the Fast Graphlet Transform.

    Args:
        A (Union[nx.Graph, csc_matrix]): Either the graph as a `networkx.Graph` object 
                                         or the adjacency matrix of the graph in `scipy.sparse.csc_matrix` format.
        raw (bool): If True, return both the raw and the net counts of the graphlets. 
                    If False, then return only the normalized counts. 
                    Defaults to False.

    Accepts either an undirected, unweighted NetworkX graph or a CSC sparse matrix.
    If a NetworkX graph is provided, converts it to a CSC adjacency matrix.
    If a CSC matrix is provided, verifies that it is unweighted and symmetric.

    Returns:
        F (DataFrame): A dataframe with the net counts of the graphlets.
        F_raw (DataFrame): A dataframe with the raw counts of the graphlets (if raw=True).
    """

    # If input is a NetworkX graph
    if isinstance(A, nx.Graph):
        # Ensure it's undirected
        if A.is_directed():
            raise ValueError("Graph must be undirected.")

        # Convert to adjacency matrix in CSC format
        adj_matrix = nx.adjacency_matrix(A)
        csc_adj = adj_matrix.tocsc()

    # If input is already a CSC matrix
    elif issparse(A) and isinstance(A, csc_matrix):
        csc_adj = A  # Use directly

        # Ensure symmetry (A == A.T)
        if not (abs(csc_adj - csc_adj.T)).nnz == 0:
            raise ValueError("CSC matrix must be symmetric (undirected graph).")

        # Ensure unweighted (all elements are 0 or 1)
        if not np.all(np.isin(csc_adj.data, [0, 1])):
            raise ValueError("CSC matrix must be unweighted (contain only 0s and 1s).")

    else:
        raise TypeError("Input must be either a NetworkX undirected graph or a CSC matrix.")

    f, fn = _fglt_c.count(csc_adj)

    # cast f and fn to int64
    f = f.astype(numpy.int64)
    fn = fn.astype(numpy.int64)

    # transpose f and fn
    f = f.T
    fn = fn.T

    # transform to dataframe
    F  = pd.DataFrame(f, columns=COLUMNS)
    FN = pd.DataFrame(fn, columns=COLUMNS)

    # set index name to "Node id (0-based)"
    F.index.name = "Node id (0-based)"
    FN.index.name = "Node id (0-based)"

    if raw:
        return FN, F
    else:
        return FN

compute_gdd_agreement(df_g1, df_g2, bins=None)

Compute Graphlet Degree Distribution (GDD) agreement between two graphs.

Parameters:

Name Type Description Default
df_g1 DataFrame

Orbit counts for Graph 1 (rows=vertices, columns=orbits).

required
df_g2 DataFrame

Orbit counts for Graph 2 (rows=vertices, columns=orbits).

required
bins Union[int, sequence]

Bins for histogram. If None, will try an automatic approach.

None

Returns:

Name Type Description
s float

The GDD agreement in [0, 1].

Source code in pyfglt/fglt.py
@typechecked
def compute_gdd_agreement(df_g1: pd.DataFrame, df_g2:pd.DataFrame, bins=None):
    """
    Compute Graphlet Degree Distribution (GDD) agreement between two graphs.

    Args:
        df_g1 (pd.DataFrame): Orbit counts for Graph 1 (rows=vertices, columns=orbits).
        df_g2 (pd.DataFrame): Orbit counts for Graph 2 (rows=vertices, columns=orbits).
        bins (Union[int, sequence]): Bins for histogram. If None, will try an automatic approach.

    Returns:
        s (float): The GDD agreement in [0, 1].
    """
    n_orbits = df_g1.shape[1]
    # We assume df_g1 and df_g2 have the same shape: #orbits = n_orbits

    # We can find a reasonable range for all orbit degrees combined
    combined_max = max(df_g1.values.max(), df_g2.values.max())
    if bins is None:
        # We'll bin from 0 up to the max count + 1
        bins = np.arange(0, combined_max + 2) - 0.5  # so that each integer is its own bin

    overlaps = []

    for orbit_col in df_g1.columns:
        # Distribution for Graph 1, orbit_col
        hist_g1, _ = np.histogram(df_g1[orbit_col], bins=bins, density=True)
        # Distribution for Graph 2, orbit_col
        hist_g2, _ = np.histogram(df_g2[orbit_col], bins=bins, density=True)

        # Overlap for this orbit
        overlap = np.sum(np.minimum(hist_g1, hist_g2))
        overlaps.append(overlap)

    # Average overlap across orbits
    gdd_agreement = np.mean(overlaps)
    return gdd_agreement

compute_graphlet_correlation_matrix(df_g, method='spearman')

Compute the Graphlet Correlation Matrix (GCM) for a single graph.

Parameters:

Name Type Description Default
df_g DataFrame

Orbit counts for a graph (rows=vertices, columns=orbits).

required

Returns:

Name Type Description
C DataFrame

Correlation matrix of shape (n_orbits, n_orbits).

Source code in pyfglt/fglt.py
@typechecked
def compute_graphlet_correlation_matrix(df_g:pd.DataFrame, method='spearman'):
    """
    Compute the Graphlet Correlation Matrix (GCM) for a single graph.

    Args:
        df_g (pd.DataFrame): Orbit counts for a graph (rows=vertices, columns=orbits).
        method (str) Correlation method. Can be 'pearson', 'spearman', or 'kendall'.

    Returns:
        C (pd.DataFrame): Correlation matrix of shape (n_orbits, n_orbits).
    """
    return df_g.iloc[:,1:].corr(method=method)

compute_rgf_distance(df_g1, df_g2)

Relative Graphlet Frequency (RGF)

Parameters:

Name Type Description Default
df_g1 DataFrame

Orbit counts for Graph 1 (rows=vertices, columns=orbits).

required
df_g2 DataFrame

Orbit counts for Graph 2 (rows=vertices, columns=orbits).

required

Compute the Relative Graphlet Frequency (RGF) distance between two graphs represented by DataFrames of orbit counts.

Returns:

Name Type Description
d float

The RGF distance between the two graphs.

Source code in pyfglt/fglt.py
@typechecked
def compute_rgf_distance(df_g1:pd.DataFrame, df_g2:pd.DataFrame) -> float:
    """Relative Graphlet Frequency (RGF)

    Args:
        df_g1 (pd.DataFrame): Orbit counts for Graph 1 (rows=vertices, columns=orbits).
        df_g2 (pd.DataFrame): Orbit counts for Graph 2 (rows=vertices, columns=orbits).

    Compute the Relative Graphlet Frequency (RGF) distance between two graphs
    represented by DataFrames of orbit counts.

    Returns:
        d (float): The RGF distance between the two graphs.
    """

    # Sum of orbit counts across all vertices for each orbit
    orbit_sums_g1 = df_g1.sum(axis=0)  # Series of length = number_of_orbits
    orbit_sums_g2 = df_g2.sum(axis=0)

    # Compute total counts
    total_g1 = orbit_sums_g1.sum()
    total_g2 = orbit_sums_g2.sum()

    # Relative frequencies for each orbit
    rel_freq_g1 = orbit_sums_g1 / total_g1 if total_g1 != 0 else orbit_sums_g1 * 0
    rel_freq_g2 = orbit_sums_g2 / total_g2 if total_g2 != 0 else orbit_sums_g2 * 0

    # RGF distance = sum of absolute differences
    rgf_distance = np.sum(np.abs(rel_freq_g1 - rel_freq_g2))
    return rgf_distance

gcm_distance(gcm1, gcm2)

Compute a simple distance between two correlation matrices. For instance, the sum of absolute differences (L1 distance).

Parameters:

Name Type Description Default
gcm1 DataFrame

GCM of the first graph

required
gcm2 DataFrame

GCM of the second graph

required

Returns:

Name Type Description
d float

A distance measure between the two GCMs.

Source code in pyfglt/fglt.py
@typechecked
def gcm_distance(gcm1:pd.DataFrame, gcm2:pd.DataFrame):
    """
    Compute a simple distance between two correlation matrices.
    For instance, the sum of absolute differences (L1 distance).

    Args:
        gcm1 (pd.DataFrame): GCM of the first graph
        gcm2 (pd.DataFrame): GCM of the second graph

    Returns:
        d (float): A distance measure between the two GCMs.
    """
    diff = gcm1.values - gcm2.values
    return np.sum(np.abs(diff))