Source code for spatialtis.spatial.nmd_markers

from __future__ import annotations

import numpy as np
import pandas as pd
from anndata import AnnData
from ast import literal_eval
from scipy.stats import spearmanr
from typing import Dict, List

from spatialtis.abc import AnalysisBase
from spatialtis.utils import doc, pbar_iter, read_exp


[docs]@doc def NMD_marker(data: AnnData, pval: float = 0.01, selected_markers: List[str] | np.ndarray = None, importance_cutoff: float = 0.5, layer_key: str = None, tree_kwargs: Dict = None, export_key: str = "nmd_marker", **kwargs, ): """Identify neighbor markers dependent marker The neighborhood is treated as a single cell. Parameters ---------- data : {adata} importance_cutoff : float Standard deviation, threshold to filter out markers that are not variant enough. pval : {pval} selected_markers : {selected_markers} layer_key : {layers_key} tree_kwargs : dict The keyword arguments that pass to the boosting tree class, (Default: n_jobs=-1, random_state=0). export_key : {export_key} **kwargs : {analysis_kwargs} """ try: from lightgbm import LGBMRegressor except ImportError: raise ImportError("lightgbm is not installed, please try `pip install lightgbm`.") ab = AnalysisBase(data, display_name="NMD marker", export_key=export_key, **kwargs) ab.check_neighbors() tree_kwargs_ = {"n_jobs": -1, "random_state": 0, "importance_type": "gain"} if tree_kwargs is not None: for k, v in tree_kwargs.items(): tree_kwargs_[k] = v markers = ab.selected_markers(selected_markers) markers_mask = ab.markers_col.isin(markers) neighbors = [literal_eval(n) for n in data.obsm[ab.neighbors_key]] cent_exp = read_exp(data[:, markers_mask], layer_key) # treat the neighbors as single cell # sum the expression neigh_exp = np.asarray([read_exp(data[n, markers_mask], layer_key).sum(1) for n in neighbors]) results_data = [] for i, y in enumerate(pbar_iter(cent_exp, desc="Neighbor-dependent markers", )): reg = LGBMRegressor(**tree_kwargs_).fit(neigh_exp, y) weights = np.asarray(reg.feature_importances_) ws = weights.sum() if ws != 0: weights = weights / weights.sum() max_ix = np.argmax(weights) max_weight = weights[max_ix] if max_weight > importance_cutoff: r, pvalue = spearmanr(y, neigh_exp[:, max_ix]) if pvalue < pval: results_data.append( [markers[i], markers[max_ix], max_weight, r, pvalue] ) ab.result = pd.DataFrame( data=results_data, columns=["marker", "neighbor_marker", "dependency", "corr", "pval"], )