Getting started

[7]:
import pyroma
roma = pyroma.ROMA()
roma
[7]:
scROMA
[ ]:
import os
import zipfile
import io
import numpy as np
import pandas as pd
import scanpy as sc
import scipy.io
from glob import glob
[ ]:
adata = pyroma.datasets.pbmc3k()
adata

standard preprocessing

[6]:
sc.pp.filter_cells(adata, min_genes=100)
sc.pp.filter_genes(adata, min_cells=3)
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
[7]:
adata
[7]:
AnnData object with n_obs × n_vars = 2700 × 13714
    obs: 'celltype', 'n_genes'
    var: 'gene_ids', 'n_cells'
    uns: 'log1p'

run pyroma

In order to load your custom genesets, or any publicly available .gmt file indicate the absolute path for roma.gmt

[ ]:
hallmarks_gmt_path = pyroma.genesets.use_hallmarks()
[ ]:
import time

roma.gmt = hallmarks_gmt_path

roma.adata = adata
roma.pc_sign_mode = 'UseMeanExpressionAllWeights'
pathways_to_check = 'all' #['HALLMARK_NOTCH_SIGNALING', 'HALLMARK_PANCREAS_BETA_CELLS', 'HALLMARK_TGF_BETA_SIGNALING' ]

iters = 100

start = time.time()
roma.compute(pathways_to_check,
             parallel=True,
             loocv_on=True,
            iters=iters,
            verbose=0)
end = time.time()

minutes, seconds = divmod(end - start, 60)
print(f"CPU Parallel {iters} iterations for shape {adata.shape}, running time (min): " + "{:0>2}:{:05.2f}".format(int(minutes),seconds))

Processing gene set: HALLMARK_NOTCH_SIGNALING | len of subsetlist:  25 
Processing gene set: HALLMARK_HEDGEHOG_SIGNALING | len of subsetlist:  17 
Processing gene set: HALLMARK_ANGIOGENESIS | len of subsetlist:  15 
Processing gene set: HALLMARK_PANCREAS_BETA_CELLS | len of subsetlist:  14 
Processing gene set: HALLMARK_WNT_BETA_CATENIN_SIGNALING | len of subsetlist:  33 
Processing gene set: HALLMARK_APICAL_SURFACE | len of subsetlist:  26 
Processing gene set: HALLMARK_REACTIVE_OXYGEN_SPECIES_PATHWAY | len of subsetlist:  47 
Processing gene set: HALLMARK_TGF_BETA_SIGNALING | len of subsetlist:  47 
Processing gene set: HALLMARK_MYC_TARGETS_V2 | len of subsetlist:  57 
Processing gene set: HALLMARK_CHOLESTEROL_HOMEOSTASIS | len of subsetlist:  61 
Processing gene set: HALLMARK_IL6_JAK_STAT3_SIGNALING | len of subsetlist:  71 
Processing gene set: HALLMARK_PROTEIN_SECRETION | len of subsetlist:  88 
Processing gene set: HALLMARK_INTERFERON_ALPHA_RESPONSE | len of subsetlist:  93 
Processing gene set: HALLMARK_ANDROGEN_RESPONSE | len of subsetlist:  82 
Processing gene set: HALLMARK_PEROXISOME | len of subsetlist:  86 
Processing gene set: HALLMARK_PI3K_AKT_MTOR_SIGNALING | len of subsetlist:  91 
Processing gene set: HALLMARK_BILE_ACID_METABOLISM | len of subsetlist:  73 
Processing gene set: HALLMARK_UNFOLDED_PROTEIN_RESPONSE | len of subsetlist:  102 
Processing gene set: HALLMARK_SPERMATOGENESIS | len of subsetlist:  71 
Processing gene set: HALLMARK_COAGULATION | len of subsetlist:  75 
Processing gene set: HALLMARK_UV_RESPONSE_DN | len of subsetlist:  100 
Processing gene set: HALLMARK_DNA_REPAIR | len of subsetlist:  142 
Processing gene set: HALLMARK_FATTY_ACID_METABOLISM | len of subsetlist:  127 
Processing gene set: HALLMARK_UV_RESPONSE_UP | len of subsetlist:  124 
Processing gene set: HALLMARK_APOPTOSIS | len of subsetlist:  142 
Processing gene set: HALLMARK_MITOTIC_SPINDLE | len of subsetlist:  179 
Processing gene set: HALLMARK_IL2_STAT5_SIGNALING | len of subsetlist:  164 
Processing gene set: HALLMARK_TNFA_SIGNALING_VIA_NFKB | len of subsetlist:  172 
Processing gene set: HALLMARK_HYPOXIA | len of subsetlist:  138 
Processing gene set: HALLMARK_G2M_CHECKPOINT | len of subsetlist:  174 
Processing gene set: HALLMARK_ADIPOGENESIS | len of subsetlist:  160 
Processing gene set: HALLMARK_ESTROGEN_RESPONSE_EARLY | len of subsetlist:  126 
Processing gene set: HALLMARK_ESTROGEN_RESPONSE_LATE | len of subsetlist:  125 
Processing gene set: HALLMARK_MYOGENESIS | len of subsetlist:  112 
Processing gene set: HALLMARK_INTERFERON_GAMMA_RESPONSE | len of subsetlist:  187 
Processing gene set: HALLMARK_APICAL_JUNCTION | len of subsetlist:  119 
Processing gene set: HALLMARK_COMPLEMENT | len of subsetlist:  159 
Processing gene set: HALLMARK_MTORC1_SIGNALING | len of subsetlist:  187 
Processing gene set: HALLMARK_E2F_TARGETS | len of subsetlist:  181 
Processing gene set: HALLMARK_MYC_TARGETS_V1 | len of subsetlist:  192 
Processing gene set: HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION | len of subsetlist:  93 
Processing gene set: HALLMARK_INFLAMMATORY_RESPONSE | len of subsetlist:  149 
Processing gene set: HALLMARK_XENOBIOTIC_METABOLISM | len of subsetlist:  131 
Processing gene set: HALLMARK_OXIDATIVE_PHOSPHORYLATION | len of subsetlist:  180 
Processing gene set: HALLMARK_GLYCOLYSIS | len of subsetlist:  145 
Processing gene set: HALLMARK_P53_PATHWAY | len of subsetlist:  160 
Processing gene set: HALLMARK_HEME_METABOLISM | len of subsetlist:  154 
Processing gene set: HALLMARK_ALLOGRAFT_REJECTION | len of subsetlist:  167 
Processing gene set: HALLMARK_KRAS_SIGNALING_UP | len of subsetlist:  125 
Processing gene set: HALLMARK_KRAS_SIGNALING_DN | len of subsetlist:  66 
  Finished : CPU Parallel 100 iterations for shape (2700, 13714), running time (min): 01:00.46
[10]:
roma
[10]:
scROMA: module activities are computed
[11]:
roma.adata
[11]:
AnnData object with n_obs × n_vars = 2700 × 13714
    obs: 'celltype', 'n_genes'
    var: 'gene_ids', 'n_cells'
    uns: 'log1p', 'ROMA', 'ROMA_stats', 'ROMA_active_modules'

all the pyroma statistics

[12]:
roma.adata.uns['ROMA_stats']
[12]:
L1 ppv L1 Median Exp ppv Med Exp q L1 q Med Exp
HALLMARK_NOTCH_SIGNALING 0.308422 0.38 -0.046337 0.356436 0.850000 0.990099
HALLMARK_HEDGEHOG_SIGNALING 0.436227 0.09 -0.043306 0.405941 0.833333 0.990099
HALLMARK_ANGIOGENESIS 0.682129 0.00 -0.092545 0.089109 0.000000 0.636492
HALLMARK_PANCREAS_BETA_CELLS 0.329636 0.34 -0.002735 0.970297 0.850000 0.990099
HALLMARK_WNT_BETA_CATENIN_SIGNALING 0.168701 0.91 0.027318 0.574257 1.000000 0.990099
HALLMARK_APICAL_SURFACE 0.273708 0.50 0.015345 0.693069 0.850000 0.990099
HALLMARK_REACTIVE_OXYGEN_SPECIES_PATHWAY 0.295123 0.16 0.010931 0.891089 0.850000 0.990099
HALLMARK_TGF_BETA_SIGNALING 0.234388 0.45 -0.024408 0.722772 0.850000 0.990099
HALLMARK_MYC_TARGETS_V2 0.190350 0.67 0.193093 0.009901 1.000000 0.165017
HALLMARK_CHOLESTEROL_HOMEOSTASIS 0.223410 0.51 -0.100565 0.089109 0.850000 0.636492
HALLMARK_IL6_JAK_STAT3_SIGNALING 0.241695 0.17 -0.072523 0.316832 0.850000 0.990099
HALLMARK_PROTEIN_SECRETION 0.072113 1.00 -0.062006 0.405941 1.000000 0.990099
HALLMARK_INTERFERON_ALPHA_RESPONSE 0.149181 0.46 0.140216 0.019802 0.850000 0.247525
HALLMARK_ANDROGEN_RESPONSE 0.188237 0.43 -0.077837 0.277228 0.850000 0.990099
HALLMARK_PEROXISOME 0.091941 0.98 -0.026166 0.772277 1.000000 0.990099
HALLMARK_PI3K_AKT_MTOR_SIGNALING 0.119006 0.85 0.053214 0.445545 1.000000 0.990099
HALLMARK_BILE_ACID_METABOLISM 0.134706 0.81 -0.041943 0.623762 1.000000 0.990099
HALLMARK_UNFOLDED_PROTEIN_RESPONSE 0.096724 0.96 -0.070962 0.435644 1.000000 0.990099
HALLMARK_SPERMATOGENESIS 0.204142 0.31 -0.050981 0.485149 0.850000 0.990099
HALLMARK_COAGULATION 0.185626 0.43 -0.015626 0.871287 0.850000 0.990099
HALLMARK_UV_RESPONSE_DN 0.189827 0.18 -0.044129 0.752475 0.850000 0.990099
HALLMARK_DNA_REPAIR 0.041673 1.00 -0.028492 0.881188 1.000000 0.990099
HALLMARK_FATTY_ACID_METABOLISM 0.153207 0.30 -0.055761 0.742574 0.850000 0.990099
HALLMARK_UV_RESPONSE_UP 0.123632 0.47 0.051468 0.782178 0.850000 0.990099
HALLMARK_APOPTOSIS 0.148120 0.33 -0.064821 0.653465 0.850000 0.990099
HALLMARK_MITOTIC_SPINDLE 0.059667 0.98 -0.047615 0.801980 1.000000 0.990099
HALLMARK_IL2_STAT5_SIGNALING 0.129669 0.34 0.052441 0.782178 0.850000 0.990099
HALLMARK_TNFA_SIGNALING_VIA_NFKB 0.109175 0.46 -0.109271 0.257426 0.850000 0.990099
HALLMARK_HYPOXIA 0.150902 0.32 -0.105616 0.217822 0.850000 0.990099
HALLMARK_G2M_CHECKPOINT 0.073522 0.90 0.004057 0.980198 1.000000 0.990099
HALLMARK_ADIPOGENESIS 0.059627 0.99 0.040987 0.831683 1.000000 0.990099
HALLMARK_ESTROGEN_RESPONSE_EARLY 0.165805 0.25 0.006120 0.990099 0.850000 0.990099
HALLMARK_ESTROGEN_RESPONSE_LATE 0.262778 0.06 -0.074091 0.544554 0.833333 0.990099
HALLMARK_MYOGENESIS 0.130965 0.63 0.044766 0.742574 1.000000 0.990099
HALLMARK_INTERFERON_GAMMA_RESPONSE 0.141345 0.30 0.137520 0.039604 0.850000 0.396040
HALLMARK_APICAL_JUNCTION 0.160732 0.39 -0.106446 0.267327 0.850000 0.990099
HALLMARK_COMPLEMENT 0.251886 0.08 -0.015366 0.950495 0.833333 0.990099
HALLMARK_MTORC1_SIGNALING 0.080113 0.84 -0.088205 0.465347 1.000000 0.990099
HALLMARK_E2F_TARGETS 0.052116 0.98 0.017490 0.920792 1.000000 0.990099
HALLMARK_MYC_TARGETS_V1 0.083975 0.80 -0.264613 0.009901 1.000000 0.165017
HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION 0.268412 0.10 -0.062662 0.514851 0.833333 0.990099
HALLMARK_INFLAMMATORY_RESPONSE 0.116391 0.49 -0.059221 0.693069 0.850000 0.990099
HALLMARK_XENOBIOTIC_METABOLISM 0.072259 0.97 -0.044517 0.792079 1.000000 0.990099
HALLMARK_OXIDATIVE_PHOSPHORYLATION 0.039787 1.00 -0.164421 0.009901 1.000000 0.165017
HALLMARK_GLYCOLYSIS 0.096515 0.72 0.081126 0.495050 1.000000 0.990099
HALLMARK_P53_PATHWAY 0.184979 0.21 -0.104431 0.227723 0.850000 0.990099
HALLMARK_HEME_METABOLISM 0.054095 1.00 -0.016380 0.940594 1.000000 0.990099
HALLMARK_ALLOGRAFT_REJECTION 0.212133 0.10 -0.025685 0.891089 0.833333 0.990099
HALLMARK_KRAS_SIGNALING_UP 0.191552 0.19 -0.082999 0.495050 0.850000 0.990099
HALLMARK_KRAS_SIGNALING_DN 0.158874 0.79 0.038865 0.584158 1.000000 0.990099

active pathways

[13]:
roma.adata.uns['ROMA_active_modules'].sort_values(by='Median Exp')
[13]:
L1 ppv L1 Median Exp ppv Med Exp q L1 q Med Exp
HALLMARK_ANGIOGENESIS 0.682129 0.0 -0.092545 0.089109 0.0 0.636492

visualize results

[14]:
sc.pp.pca(roma.adata)
sc.pp.neighbors(roma.adata)
sc.tl.umap(roma.adata)
[15]:
adata.obs
[15]:
celltype n_genes
AAACATACAACCAC-1 CD4 T 781
AAACATTGAGCTAC-1 B 1352
AAACATTGATCAGC-1 CD4 T 1131
AAACCGTGCTTCCG-1 CD14 Monocytes 960
AAACCGTGTATGCG-1 NK 522
... ... ...
TTTCGAACTCTCAT-1 CD14 Monocytes 1155
TTTCTACTGAGGCA-1 B 1227
TTTCTACTTCCTCG-1 B 622
TTTGCATGAGAGGC-1 B 454
TTTGCATGCCTCAC-1 CD4 T 724

2700 rows × 2 columns

[16]:
sc.pl.pca(adata, color=['celltype',
                                ],
                                frameon=False,
                                ncols=2,
                                wspace=0.4)
../_images/tutorials_getting_started_20_0.png
[17]:
sc.pl.umap(adata, color=['celltype',
                                ],
                                frameon=False,
                                ncols=2,
                                wspace=0.4)
../_images/tutorials_getting_started_21_0.png
[18]:
geneset_name = 'HALLMARK_INFLAMMATORY_RESPONSE'

roma.adata.obs[f'{geneset_name}_projections_1'] = roma.adata.uns['ROMA'][f'{geneset_name}'].svd.components_[0]
roma.adata.obs[f'{geneset_name}_projections_1_reverse'] = -1 * roma.adata.uns['ROMA'][f'{geneset_name}'].svd.components_[0]

GeneScores = pd.DataFrame(roma.adata.uns['ROMA'][f'{geneset_name}'].projections_1,
                index=roma.adata.uns['ROMA'][f'{geneset_name}'].subsetlist,
                columns=[f'{geneset_name}'])

sc.pl.pca(adata, color=['celltype',
                                f'{geneset_name}_projections_1'],
                                frameon=False,
                                ncols=2,
                                wspace=0.4)

sc.pl.umap(adata, color=['celltype',
                                f'{geneset_name}_projections_1'],
                                frameon=False,
                                ncols=2,
                                wspace=0.4)
../_images/tutorials_getting_started_22_0.png
../_images/tutorials_getting_started_22_1.png
[19]:
GeneScores
[19]:
HALLMARK_INFLAMMATORY_RESPONSE
CXCL10 -0.170376
CCL2 -0.171694
CCL5 45.372082
FPR1 -3.665694
CCL20 0.093291
... ...
STAB1 -0.306701
IRF1 1.357086
ICAM4 -0.329905
P2RX4 -0.153743
ABI1 1.406667

149 rows × 1 columns

[20]:
# optional: to load pathway-genes dictionary
genesets = roma.read_gmt_to_dict(roma.gmt)

save and load results

[21]:
save_dir = 'results'

roma.save_active_modules_results(path=save_dir)
[22]:
del roma.adata
[23]:
#roma.adata
[24]:
load_dir = 'results'

roma.load_active_modules_results(path=save_dir)
[25]:
roma.adata
[25]:
AnnData object with n_obs × n_vars = 2700 × 13714
    obs: 'celltype', 'n_genes', 'HALLMARK_INFLAMMATORY_RESPONSE_projections_1', 'HALLMARK_INFLAMMATORY_RESPONSE_projections_1_reverse'
    var: 'gene_ids', 'n_cells'
    uns: 'ROMA_active_modules', 'ROMA_stats', 'celltype_colors', 'log1p', 'neighbors', 'pca', 'umap', 'ROMA'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    obsp: 'connectivities', 'distances'

top contributing genes

[26]:
GeneScores.sort_values(by=geneset_name)
[26]:
HALLMARK_INFLAMMATORY_RESPONSE
LY6E -9.469896
TIMP1 -8.930674
CD14 -7.395456
NFKBIA -5.897358
RHOG -4.730138
... ...
CD69 3.288985
IL7R 6.165412
IFITM1 6.817437
LCK 8.328794
CCL5 45.372082

149 rows × 1 columns

[27]:
# number of top and bottom genes
n_top = 10
short_name = geneset_name.split('_')[1]

top_n = GeneScores.sort_values(by=f'{geneset_name}', ascending=False).head(n_top).index.tolist()
bottom_n = GeneScores.sort_values(by=f'{geneset_name}', ascending=False).tail(n_top).index.tolist()

top_mean_exp = roma.adata[:, top_n].X.mean(axis=1)
bottom_mean_exp = roma.adata[:, bottom_n].X.mean(axis=1)

roma.adata.obs[f'top_{n_top}_{short_name}_Mean_Exp'] = top_mean_exp
roma.adata.obs[f'bottom_{n_top}_{short_name}_Mean_Exp'] = bottom_mean_exp

sc.pl.pca(roma.adata, color=['celltype',
                                f'{geneset_name}_projections_1',
                                f'top_{n_top}_{short_name}_Mean_Exp',
                                f'bottom_{n_top}_{short_name}_Mean_Exp'],
                                frameon=False,
                                ncols=2,
                                wspace=0.4)

sc.pl.umap(roma.adata, color=['celltype',
                                f'{geneset_name}_projections_1',
                                f'top_{n_top}_{short_name}_Mean_Exp',
                                f'bottom_{n_top}_{short_name}_Mean_Exp'],
                                frameon=False,
                                ncols=2,
                                wspace=0.4)

../_images/tutorials_getting_started_33_0.png
../_images/tutorials_getting_started_33_1.png

end

[ ]: