Source code for sccellfie.communication.traditional_scoring

import numpy as np
import pandas as pd
from itertools import product

from sccellfie.expression.aggregation import agg_expression_cells


[docs] def compute_communication_scores(adata, groupby, var_pairs, communication_score='gmean', agg_func='mean', layer=None, ligand_threshold=0, receptor_threshold=0): """ Computes communication scores between pairs of features or variables (normally representing ligand-receptor pairs) across different cell types. Parameters ---------- adata : AnnData AnnData object containing expression data and grouping information groupby : str Column in adata.obs for grouping cells to aggregate expression. var_pairs : list of tuples List of (var1, var2) pairs (normally representing ligand-receptor pairs). communication_score : str, default='gmean' Method to compute communication scores. Options are: - 'gmean': geometric mean (sqrt(x * y)) - 'product': simple multiplication (x * y) - 'mean': arithmetic mean ((x + y) / 2) agg_func : str, default='mean' Aggregation function for aggregating expression values across cells. Options are 'mean', 'median', '25p' (25th percentile), '75p' (75th percentile), 'trimean' (0.5*Q2 + 0.25(Q1+Q3)), and 'topmean'. layer : str, optional Layer in adata to use for aggregation. If None, the main expression matrix adata.X is used. ligand_threshold : float, default=0 Threshold for calculating the fraction of cells expressing the ligand. Only cells with expression above this threshold are considered as expressing the ligand. receptor_threshold : float, default=0 Threshold for calculating the fraction of cells expressing the receptor. Only cells with expression above this threshold are considered as expressing the receptor. Returns ------- ccc_scores : pandas.DataFrame DataFrame containing the communication scores between cell types for each variable pair. Columns are: - sender_celltype: type of the sender cell - receiver_celltype: type of the receiver cell - ligand: name of the ligand - receptor: name of the receptor - score: communication score - ligand_fraction: fraction of sender cells expressing the ligand - receptor_fraction: fraction of receiver cells expressing the receptor """ # Split variable pairs vars1, vars2 = zip(*var_pairs) # Check if variables are present in adata missing_vars = list(set([var for var in vars1 + vars2 if var not in adata.var_names])) if missing_vars: raise ValueError(f'Variables not found in adata.var_names: {missing_vars}') # Aggregate expression for scores agg_df = agg_expression_cells(adata, groupby, layer=layer, agg_func=agg_func) cell_types = agg_df.index.unique() # Calculate fraction of cells expressing above threshold ligand_fractions = agg_expression_cells( adata, groupby, layer=layer, gene_symbols=list(vars1), agg_func='fraction_above', threshold=ligand_threshold ) receptor_fractions = agg_expression_cells( adata, groupby, layer=layer, gene_symbols=list(vars2), agg_func='fraction_above', threshold=receptor_threshold ) # Initialize results list results = [] # Calculate scores for each combination of cell types and variable pairs for sender, receiver in product(cell_types, cell_types): for (ligand, receptor) in var_pairs: # Get expression values ligand_expr = agg_df.loc[sender, ligand] receptor_expr = agg_df.loc[receiver, receptor] # Calculate communication score score = CCC_FUNC[communication_score](ligand_expr, receptor_expr) # Create result row result_dict = { 'sender_celltype': sender, 'receiver_celltype': receiver, 'ligand': ligand, 'receptor': receptor, 'score': score, 'ligand_fraction': ligand_fractions.loc[sender, ligand], 'receptor_fraction': receptor_fractions.loc[receiver, receptor] } results.append(result_dict) # Create result dataframe ccc_scores = pd.DataFrame(results) return ccc_scores
CCC_FUNC = { 'gmean': lambda x, y: np.sqrt(x * y), 'product': lambda x, y: x * y, 'mean': lambda x, y: (x + y) / 2, }