Getting started¶
[7]:
import pyroma
roma = pyroma.ROMA()
roma
[7]:
scROMA
[ ]:
import os
import zipfile
import io
import numpy as np
import pandas as pd
import scanpy as sc
import scipy.io
from glob import glob
[ ]:
adata = pyroma.datasets.pbmc3k()
adata
standard preprocessing¶
[6]:
sc.pp.filter_cells(adata, min_genes=100)
sc.pp.filter_genes(adata, min_cells=3)
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
[7]:
adata
[7]:
AnnData object with n_obs × n_vars = 2700 × 13714
obs: 'celltype', 'n_genes'
var: 'gene_ids', 'n_cells'
uns: 'log1p'
run pyroma¶
In order to load your custom genesets, or any publicly available .gmt file indicate the absolute path for roma.gmt
[ ]:
hallmarks_gmt_path = pyroma.genesets.use_hallmarks()
[ ]:
import time
roma.gmt = hallmarks_gmt_path
roma.adata = adata
roma.pc_sign_mode = 'UseMeanExpressionAllWeights'
pathways_to_check = 'all' #['HALLMARK_NOTCH_SIGNALING', 'HALLMARK_PANCREAS_BETA_CELLS', 'HALLMARK_TGF_BETA_SIGNALING' ]
iters = 100
start = time.time()
roma.compute(pathways_to_check,
parallel=True,
loocv_on=True,
iters=iters,
verbose=0)
end = time.time()
minutes, seconds = divmod(end - start, 60)
print(f"CPU Parallel {iters} iterations for shape {adata.shape}, running time (min): " + "{:0>2}:{:05.2f}".format(int(minutes),seconds))
Processing gene set: HALLMARK_NOTCH_SIGNALING | len of subsetlist: 25
Processing gene set: HALLMARK_HEDGEHOG_SIGNALING | len of subsetlist: 17
Processing gene set: HALLMARK_ANGIOGENESIS | len of subsetlist: 15
Processing gene set: HALLMARK_PANCREAS_BETA_CELLS | len of subsetlist: 14
Processing gene set: HALLMARK_WNT_BETA_CATENIN_SIGNALING | len of subsetlist: 33
Processing gene set: HALLMARK_APICAL_SURFACE | len of subsetlist: 26
Processing gene set: HALLMARK_REACTIVE_OXYGEN_SPECIES_PATHWAY | len of subsetlist: 47
Processing gene set: HALLMARK_TGF_BETA_SIGNALING | len of subsetlist: 47
Processing gene set: HALLMARK_MYC_TARGETS_V2 | len of subsetlist: 57
Processing gene set: HALLMARK_CHOLESTEROL_HOMEOSTASIS | len of subsetlist: 61
Processing gene set: HALLMARK_IL6_JAK_STAT3_SIGNALING | len of subsetlist: 71
Processing gene set: HALLMARK_PROTEIN_SECRETION | len of subsetlist: 88
Processing gene set: HALLMARK_INTERFERON_ALPHA_RESPONSE | len of subsetlist: 93
Processing gene set: HALLMARK_ANDROGEN_RESPONSE | len of subsetlist: 82
Processing gene set: HALLMARK_PEROXISOME | len of subsetlist: 86
Processing gene set: HALLMARK_PI3K_AKT_MTOR_SIGNALING | len of subsetlist: 91
Processing gene set: HALLMARK_BILE_ACID_METABOLISM | len of subsetlist: 73
Processing gene set: HALLMARK_UNFOLDED_PROTEIN_RESPONSE | len of subsetlist: 102
Processing gene set: HALLMARK_SPERMATOGENESIS | len of subsetlist: 71
Processing gene set: HALLMARK_COAGULATION | len of subsetlist: 75
Processing gene set: HALLMARK_UV_RESPONSE_DN | len of subsetlist: 100
Processing gene set: HALLMARK_DNA_REPAIR | len of subsetlist: 142
Processing gene set: HALLMARK_FATTY_ACID_METABOLISM | len of subsetlist: 127
Processing gene set: HALLMARK_UV_RESPONSE_UP | len of subsetlist: 124
Processing gene set: HALLMARK_APOPTOSIS | len of subsetlist: 142
Processing gene set: HALLMARK_MITOTIC_SPINDLE | len of subsetlist: 179
Processing gene set: HALLMARK_IL2_STAT5_SIGNALING | len of subsetlist: 164
Processing gene set: HALLMARK_TNFA_SIGNALING_VIA_NFKB | len of subsetlist: 172
Processing gene set: HALLMARK_HYPOXIA | len of subsetlist: 138
Processing gene set: HALLMARK_G2M_CHECKPOINT | len of subsetlist: 174
Processing gene set: HALLMARK_ADIPOGENESIS | len of subsetlist: 160
Processing gene set: HALLMARK_ESTROGEN_RESPONSE_EARLY | len of subsetlist: 126
Processing gene set: HALLMARK_ESTROGEN_RESPONSE_LATE | len of subsetlist: 125
Processing gene set: HALLMARK_MYOGENESIS | len of subsetlist: 112
Processing gene set: HALLMARK_INTERFERON_GAMMA_RESPONSE | len of subsetlist: 187
Processing gene set: HALLMARK_APICAL_JUNCTION | len of subsetlist: 119
Processing gene set: HALLMARK_COMPLEMENT | len of subsetlist: 159
Processing gene set: HALLMARK_MTORC1_SIGNALING | len of subsetlist: 187
Processing gene set: HALLMARK_E2F_TARGETS | len of subsetlist: 181
Processing gene set: HALLMARK_MYC_TARGETS_V1 | len of subsetlist: 192
Processing gene set: HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION | len of subsetlist: 93
Processing gene set: HALLMARK_INFLAMMATORY_RESPONSE | len of subsetlist: 149
Processing gene set: HALLMARK_XENOBIOTIC_METABOLISM | len of subsetlist: 131
Processing gene set: HALLMARK_OXIDATIVE_PHOSPHORYLATION | len of subsetlist: 180
Processing gene set: HALLMARK_GLYCOLYSIS | len of subsetlist: 145
Processing gene set: HALLMARK_P53_PATHWAY | len of subsetlist: 160
Processing gene set: HALLMARK_HEME_METABOLISM | len of subsetlist: 154
Processing gene set: HALLMARK_ALLOGRAFT_REJECTION | len of subsetlist: 167
Processing gene set: HALLMARK_KRAS_SIGNALING_UP | len of subsetlist: 125
Processing gene set: HALLMARK_KRAS_SIGNALING_DN | len of subsetlist: 66
Finished : CPU Parallel 100 iterations for shape (2700, 13714), running time (min): 01:00.46
[10]:
roma
[10]:
scROMA: module activities are computed
[11]:
roma.adata
[11]:
AnnData object with n_obs × n_vars = 2700 × 13714
obs: 'celltype', 'n_genes'
var: 'gene_ids', 'n_cells'
uns: 'log1p', 'ROMA', 'ROMA_stats', 'ROMA_active_modules'
all the pyroma statistics¶
[12]:
roma.adata.uns['ROMA_stats']
[12]:
| L1 | ppv L1 | Median Exp | ppv Med Exp | q L1 | q Med Exp | |
|---|---|---|---|---|---|---|
| HALLMARK_NOTCH_SIGNALING | 0.308422 | 0.38 | -0.046337 | 0.356436 | 0.850000 | 0.990099 |
| HALLMARK_HEDGEHOG_SIGNALING | 0.436227 | 0.09 | -0.043306 | 0.405941 | 0.833333 | 0.990099 |
| HALLMARK_ANGIOGENESIS | 0.682129 | 0.00 | -0.092545 | 0.089109 | 0.000000 | 0.636492 |
| HALLMARK_PANCREAS_BETA_CELLS | 0.329636 | 0.34 | -0.002735 | 0.970297 | 0.850000 | 0.990099 |
| HALLMARK_WNT_BETA_CATENIN_SIGNALING | 0.168701 | 0.91 | 0.027318 | 0.574257 | 1.000000 | 0.990099 |
| HALLMARK_APICAL_SURFACE | 0.273708 | 0.50 | 0.015345 | 0.693069 | 0.850000 | 0.990099 |
| HALLMARK_REACTIVE_OXYGEN_SPECIES_PATHWAY | 0.295123 | 0.16 | 0.010931 | 0.891089 | 0.850000 | 0.990099 |
| HALLMARK_TGF_BETA_SIGNALING | 0.234388 | 0.45 | -0.024408 | 0.722772 | 0.850000 | 0.990099 |
| HALLMARK_MYC_TARGETS_V2 | 0.190350 | 0.67 | 0.193093 | 0.009901 | 1.000000 | 0.165017 |
| HALLMARK_CHOLESTEROL_HOMEOSTASIS | 0.223410 | 0.51 | -0.100565 | 0.089109 | 0.850000 | 0.636492 |
| HALLMARK_IL6_JAK_STAT3_SIGNALING | 0.241695 | 0.17 | -0.072523 | 0.316832 | 0.850000 | 0.990099 |
| HALLMARK_PROTEIN_SECRETION | 0.072113 | 1.00 | -0.062006 | 0.405941 | 1.000000 | 0.990099 |
| HALLMARK_INTERFERON_ALPHA_RESPONSE | 0.149181 | 0.46 | 0.140216 | 0.019802 | 0.850000 | 0.247525 |
| HALLMARK_ANDROGEN_RESPONSE | 0.188237 | 0.43 | -0.077837 | 0.277228 | 0.850000 | 0.990099 |
| HALLMARK_PEROXISOME | 0.091941 | 0.98 | -0.026166 | 0.772277 | 1.000000 | 0.990099 |
| HALLMARK_PI3K_AKT_MTOR_SIGNALING | 0.119006 | 0.85 | 0.053214 | 0.445545 | 1.000000 | 0.990099 |
| HALLMARK_BILE_ACID_METABOLISM | 0.134706 | 0.81 | -0.041943 | 0.623762 | 1.000000 | 0.990099 |
| HALLMARK_UNFOLDED_PROTEIN_RESPONSE | 0.096724 | 0.96 | -0.070962 | 0.435644 | 1.000000 | 0.990099 |
| HALLMARK_SPERMATOGENESIS | 0.204142 | 0.31 | -0.050981 | 0.485149 | 0.850000 | 0.990099 |
| HALLMARK_COAGULATION | 0.185626 | 0.43 | -0.015626 | 0.871287 | 0.850000 | 0.990099 |
| HALLMARK_UV_RESPONSE_DN | 0.189827 | 0.18 | -0.044129 | 0.752475 | 0.850000 | 0.990099 |
| HALLMARK_DNA_REPAIR | 0.041673 | 1.00 | -0.028492 | 0.881188 | 1.000000 | 0.990099 |
| HALLMARK_FATTY_ACID_METABOLISM | 0.153207 | 0.30 | -0.055761 | 0.742574 | 0.850000 | 0.990099 |
| HALLMARK_UV_RESPONSE_UP | 0.123632 | 0.47 | 0.051468 | 0.782178 | 0.850000 | 0.990099 |
| HALLMARK_APOPTOSIS | 0.148120 | 0.33 | -0.064821 | 0.653465 | 0.850000 | 0.990099 |
| HALLMARK_MITOTIC_SPINDLE | 0.059667 | 0.98 | -0.047615 | 0.801980 | 1.000000 | 0.990099 |
| HALLMARK_IL2_STAT5_SIGNALING | 0.129669 | 0.34 | 0.052441 | 0.782178 | 0.850000 | 0.990099 |
| HALLMARK_TNFA_SIGNALING_VIA_NFKB | 0.109175 | 0.46 | -0.109271 | 0.257426 | 0.850000 | 0.990099 |
| HALLMARK_HYPOXIA | 0.150902 | 0.32 | -0.105616 | 0.217822 | 0.850000 | 0.990099 |
| HALLMARK_G2M_CHECKPOINT | 0.073522 | 0.90 | 0.004057 | 0.980198 | 1.000000 | 0.990099 |
| HALLMARK_ADIPOGENESIS | 0.059627 | 0.99 | 0.040987 | 0.831683 | 1.000000 | 0.990099 |
| HALLMARK_ESTROGEN_RESPONSE_EARLY | 0.165805 | 0.25 | 0.006120 | 0.990099 | 0.850000 | 0.990099 |
| HALLMARK_ESTROGEN_RESPONSE_LATE | 0.262778 | 0.06 | -0.074091 | 0.544554 | 0.833333 | 0.990099 |
| HALLMARK_MYOGENESIS | 0.130965 | 0.63 | 0.044766 | 0.742574 | 1.000000 | 0.990099 |
| HALLMARK_INTERFERON_GAMMA_RESPONSE | 0.141345 | 0.30 | 0.137520 | 0.039604 | 0.850000 | 0.396040 |
| HALLMARK_APICAL_JUNCTION | 0.160732 | 0.39 | -0.106446 | 0.267327 | 0.850000 | 0.990099 |
| HALLMARK_COMPLEMENT | 0.251886 | 0.08 | -0.015366 | 0.950495 | 0.833333 | 0.990099 |
| HALLMARK_MTORC1_SIGNALING | 0.080113 | 0.84 | -0.088205 | 0.465347 | 1.000000 | 0.990099 |
| HALLMARK_E2F_TARGETS | 0.052116 | 0.98 | 0.017490 | 0.920792 | 1.000000 | 0.990099 |
| HALLMARK_MYC_TARGETS_V1 | 0.083975 | 0.80 | -0.264613 | 0.009901 | 1.000000 | 0.165017 |
| HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION | 0.268412 | 0.10 | -0.062662 | 0.514851 | 0.833333 | 0.990099 |
| HALLMARK_INFLAMMATORY_RESPONSE | 0.116391 | 0.49 | -0.059221 | 0.693069 | 0.850000 | 0.990099 |
| HALLMARK_XENOBIOTIC_METABOLISM | 0.072259 | 0.97 | -0.044517 | 0.792079 | 1.000000 | 0.990099 |
| HALLMARK_OXIDATIVE_PHOSPHORYLATION | 0.039787 | 1.00 | -0.164421 | 0.009901 | 1.000000 | 0.165017 |
| HALLMARK_GLYCOLYSIS | 0.096515 | 0.72 | 0.081126 | 0.495050 | 1.000000 | 0.990099 |
| HALLMARK_P53_PATHWAY | 0.184979 | 0.21 | -0.104431 | 0.227723 | 0.850000 | 0.990099 |
| HALLMARK_HEME_METABOLISM | 0.054095 | 1.00 | -0.016380 | 0.940594 | 1.000000 | 0.990099 |
| HALLMARK_ALLOGRAFT_REJECTION | 0.212133 | 0.10 | -0.025685 | 0.891089 | 0.833333 | 0.990099 |
| HALLMARK_KRAS_SIGNALING_UP | 0.191552 | 0.19 | -0.082999 | 0.495050 | 0.850000 | 0.990099 |
| HALLMARK_KRAS_SIGNALING_DN | 0.158874 | 0.79 | 0.038865 | 0.584158 | 1.000000 | 0.990099 |
active pathways¶
[13]:
roma.adata.uns['ROMA_active_modules'].sort_values(by='Median Exp')
[13]:
| L1 | ppv L1 | Median Exp | ppv Med Exp | q L1 | q Med Exp | |
|---|---|---|---|---|---|---|
| HALLMARK_ANGIOGENESIS | 0.682129 | 0.0 | -0.092545 | 0.089109 | 0.0 | 0.636492 |
visualize results¶
[14]:
sc.pp.pca(roma.adata)
sc.pp.neighbors(roma.adata)
sc.tl.umap(roma.adata)
[15]:
adata.obs
[15]:
| celltype | n_genes | |
|---|---|---|
| AAACATACAACCAC-1 | CD4 T | 781 |
| AAACATTGAGCTAC-1 | B | 1352 |
| AAACATTGATCAGC-1 | CD4 T | 1131 |
| AAACCGTGCTTCCG-1 | CD14 Monocytes | 960 |
| AAACCGTGTATGCG-1 | NK | 522 |
| ... | ... | ... |
| TTTCGAACTCTCAT-1 | CD14 Monocytes | 1155 |
| TTTCTACTGAGGCA-1 | B | 1227 |
| TTTCTACTTCCTCG-1 | B | 622 |
| TTTGCATGAGAGGC-1 | B | 454 |
| TTTGCATGCCTCAC-1 | CD4 T | 724 |
2700 rows × 2 columns
[16]:
sc.pl.pca(adata, color=['celltype',
],
frameon=False,
ncols=2,
wspace=0.4)
[17]:
sc.pl.umap(adata, color=['celltype',
],
frameon=False,
ncols=2,
wspace=0.4)
[18]:
geneset_name = 'HALLMARK_INFLAMMATORY_RESPONSE'
roma.adata.obs[f'{geneset_name}_projections_1'] = roma.adata.uns['ROMA'][f'{geneset_name}'].svd.components_[0]
roma.adata.obs[f'{geneset_name}_projections_1_reverse'] = -1 * roma.adata.uns['ROMA'][f'{geneset_name}'].svd.components_[0]
GeneScores = pd.DataFrame(roma.adata.uns['ROMA'][f'{geneset_name}'].projections_1,
index=roma.adata.uns['ROMA'][f'{geneset_name}'].subsetlist,
columns=[f'{geneset_name}'])
sc.pl.pca(adata, color=['celltype',
f'{geneset_name}_projections_1'],
frameon=False,
ncols=2,
wspace=0.4)
sc.pl.umap(adata, color=['celltype',
f'{geneset_name}_projections_1'],
frameon=False,
ncols=2,
wspace=0.4)
[19]:
GeneScores
[19]:
| HALLMARK_INFLAMMATORY_RESPONSE | |
|---|---|
| CXCL10 | -0.170376 |
| CCL2 | -0.171694 |
| CCL5 | 45.372082 |
| FPR1 | -3.665694 |
| CCL20 | 0.093291 |
| ... | ... |
| STAB1 | -0.306701 |
| IRF1 | 1.357086 |
| ICAM4 | -0.329905 |
| P2RX4 | -0.153743 |
| ABI1 | 1.406667 |
149 rows × 1 columns
[20]:
# optional: to load pathway-genes dictionary
genesets = roma.read_gmt_to_dict(roma.gmt)
save and load results¶
[21]:
save_dir = 'results'
roma.save_active_modules_results(path=save_dir)
[22]:
del roma.adata
[23]:
#roma.adata
[24]:
load_dir = 'results'
roma.load_active_modules_results(path=save_dir)
[25]:
roma.adata
[25]:
AnnData object with n_obs × n_vars = 2700 × 13714
obs: 'celltype', 'n_genes', 'HALLMARK_INFLAMMATORY_RESPONSE_projections_1', 'HALLMARK_INFLAMMATORY_RESPONSE_projections_1_reverse'
var: 'gene_ids', 'n_cells'
uns: 'ROMA_active_modules', 'ROMA_stats', 'celltype_colors', 'log1p', 'neighbors', 'pca', 'umap', 'ROMA'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'connectivities', 'distances'
top contributing genes¶
[26]:
GeneScores.sort_values(by=geneset_name)
[26]:
| HALLMARK_INFLAMMATORY_RESPONSE | |
|---|---|
| LY6E | -9.469896 |
| TIMP1 | -8.930674 |
| CD14 | -7.395456 |
| NFKBIA | -5.897358 |
| RHOG | -4.730138 |
| ... | ... |
| CD69 | 3.288985 |
| IL7R | 6.165412 |
| IFITM1 | 6.817437 |
| LCK | 8.328794 |
| CCL5 | 45.372082 |
149 rows × 1 columns
[27]:
# number of top and bottom genes
n_top = 10
short_name = geneset_name.split('_')[1]
top_n = GeneScores.sort_values(by=f'{geneset_name}', ascending=False).head(n_top).index.tolist()
bottom_n = GeneScores.sort_values(by=f'{geneset_name}', ascending=False).tail(n_top).index.tolist()
top_mean_exp = roma.adata[:, top_n].X.mean(axis=1)
bottom_mean_exp = roma.adata[:, bottom_n].X.mean(axis=1)
roma.adata.obs[f'top_{n_top}_{short_name}_Mean_Exp'] = top_mean_exp
roma.adata.obs[f'bottom_{n_top}_{short_name}_Mean_Exp'] = bottom_mean_exp
sc.pl.pca(roma.adata, color=['celltype',
f'{geneset_name}_projections_1',
f'top_{n_top}_{short_name}_Mean_Exp',
f'bottom_{n_top}_{short_name}_Mean_Exp'],
frameon=False,
ncols=2,
wspace=0.4)
sc.pl.umap(roma.adata, color=['celltype',
f'{geneset_name}_projections_1',
f'top_{n_top}_{short_name}_Mean_Exp',
f'bottom_{n_top}_{short_name}_Mean_Exp'],
frameon=False,
ncols=2,
wspace=0.4)
end¶
[ ]: