Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,8 @@ Version 0.4.6
* Added slice_regions_matrix.py script
* Optimized and improved differential_analysis.py
* Added compatibility with R 3.4 and rpy2 latest versions

Version 0.5.0
* Fixed bugs
* Added support for biocparalell
* Added extract_spots.py script
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ The referred matrix format is the ST data format, a matrix of counts where spot
and the genes are column names. This matrix format (.TSV) is generated with the
[ST Pipeline](https://github.com/SpatialTranscriptomicsResearch/st_pipeline)

The scripts that allow you to pass the tissue HE image can optionally take a 3x3 alignment file.
The scripts that allows you to pass the tissue HE image can optionally take a 3x3 alignment file.
If the images are cropped to the exact array boundaries the alignment file is not needed
unless you want to plot the image in the original image size. If the image is un-cropped
then you need the alignment file to convert from spot coordinates to pixel coordinates.
Expand Down
21 changes: 17 additions & 4 deletions stanalysis/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from matplotlib.colors import LinearSegmentedColormap
from matplotlib import colors as mpcolors
from collections import Counter
import multiprocessing
import rpy2.robjects.packages as rpackages
import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri, r, numpy2ri, globalenv
Expand All @@ -16,7 +17,9 @@ def computeNClusters(counts, min_size=20):
from the data using Scran::quickCluster"""
pandas2ri.activate()
r_counts = pandas2ri.py2ri(counts.transpose())
scran = RimportLibrary("scran")
scran = RimportLibrary("scran")
multicore = RimportLibrary("BiocParallel")
multicore.register(multicore.MulticoreParam(multiprocessing.cpu_count()-1))
as_matrix = r["as.matrix"]
clusters = scran.quickCluster(as_matrix(r_counts), min_size)
n_clust = len(set(clusters))
Expand All @@ -34,7 +37,8 @@ def deaDESeq2(counts, conds, comparisons, alpha, size_factors=None):
try:
pandas2ri.activate()
deseq2 = RimportLibrary("DESeq2")
r("suppressMessages(library(DESeq2))")
multicore = RimportLibrary("BiocParallel")
multicore.register(multicore.MulticoreParam(multiprocessing.cpu_count()-1))
# Create the R conditions and counts data
r_counts = pandas2ri.py2ri(counts)
cond = robjects.DataFrame({"conditions": robjects.StrVector(conds)})
Expand Down Expand Up @@ -74,6 +78,8 @@ def deaScranDESeq2(counts, conds, comparisons, alpha, scran_clusters=False):
pandas2ri.activate()
deseq2 = RimportLibrary("DESeq2")
scran = RimportLibrary("scran")
multicore = RimportLibrary("BiocParallel")
multicore.register(multicore.MulticoreParam(multiprocessing.cpu_count()-1))
as_matrix = r["as.matrix"]
# Create the R conditions and counts data
r_counts = pandas2ri.py2ri(counts)
Expand Down Expand Up @@ -102,7 +108,12 @@ def deaScranDESeq2(counts, conds, comparisons, alpha, scran_clusters=False):
# Perform the comparisons and store results in list
for A,B in comparisons:
result = r.results(dds, contrast=r.c("conditions", A, B), alpha=alpha)
result = pandas2ri.ri2py_dataframe(r['as.data.frame'](result))
result = r['as.data.frame'](result)
genes = r['rownames'](result)
result = pandas2ri.ri2py_dataframe(result)
# There seems to be a problem parsing the rownames from R to pandas
# so we do it manually
result.index = genes
results.append(result)
pandas2ri.deactivate()
except Exception as e:
Expand Down Expand Up @@ -142,7 +153,9 @@ def Rtsne(counts, dimensions, theta=0.5, dims=50, perplexity=30, max_iter=1000):
using the R package Rtsne"""
pandas2ri.activate()
r_counts = pandas2ri.py2ri(counts)
tsne = RimportLibrary("Rtsne")
tsne = RimportLibrary("Rtsne")
multicore = RimportLibrary("BiocParallel")
multicore.register(multicore.MulticoreParam(multiprocessing.cpu_count()-1))
as_matrix = r["as.matrix"]
tsne_out = tsne.Rtsne(as_matrix(counts),
dims=dimensions,
Expand Down
15 changes: 13 additions & 2 deletions stanalysis/normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import numpy as np
import pandas as pd
from collections import Counter
import multiprocessing
import rpy2.robjects.packages as rpackages
from rpy2.robjects import pandas2ri, r, numpy2ri
import rpy2.robjects as ro
Expand All @@ -29,6 +30,8 @@ def computeTMMFactors(counts):
pandas2ri.activate()
r_counts = pandas2ri.py2ri(counts)
edger = RimportLibrary("edgeR")
multicore = RimportLibrary("BiocParallel")
multicore.register(multicore.MulticoreParam(multiprocessing.cpu_count()-1))
as_matrix = r["as.matrix"]
dds = edger.calcNormFactors(as_matrix(r_counts), method="TMM")
pandas_sf = pandas2ri.ri2py(dds)
Expand All @@ -45,6 +48,8 @@ def computeRLEFactors(counts):
pandas2ri.activate()
r_counts = pandas2ri.py2ri(counts)
edger = RimportLibrary("edgeR")
multicore = RimportLibrary("BiocParallel")
multicore.register(multicore.MulticoreParam(multiprocessing.cpu_count()-1))
as_matrix = r["as.matrix"]
dds = edger.calcNormFactors(as_matrix(r_counts), method="RLE")
pandas_sf = pandas2ri.ri2py(dds)
Expand All @@ -64,6 +69,8 @@ def computeSumFactors(counts, scran_clusters=True):
pandas2ri.activate()
r_counts = pandas2ri.py2ri(counts)
scran = RimportLibrary("scran")
multicore = RimportLibrary("BiocParallel")
multicore.register(multicore.MulticoreParam(multiprocessing.cpu_count()-1))
as_matrix = r["as.matrix"]
if scran_clusters:
r_clusters = scran.quickCluster(as_matrix(r_counts), max(n_cells/10, 10))
Expand Down Expand Up @@ -117,8 +124,10 @@ def computeSizeFactors(counts):
"""
pandas2ri.activate()
r_counts = pandas2ri.py2ri(counts)
deseq = RimportLibrary("DESeq")
dds = deseq.estimateSizeFactorsForMatrix(r_counts)
deseq2 = RimportLibrary("DESeq2")
multicore = RimportLibrary("BiocParallel")
multicore.register(multicore.MulticoreParam(multiprocessing.cpu_count()-1))
dds = deseq2.estimateSizeFactorsForMatrix(r_counts)
pandas_sf = pandas2ri.ri2py(dds)
pandas2ri.deactivate()
return pandas_sf
Expand Down Expand Up @@ -146,6 +155,8 @@ def computeSizeFactorsLinear(counts):
pandas2ri.activate()
r_counts = pandas2ri.py2ri(counts)
deseq2 = RimportLibrary("DESeq2")
multicore = RimportLibrary("BiocParallel")
multicore.register(multicore.MulticoreParam(multiprocessing.cpu_count()-1))
vec = rpackages.importr('S4Vectors')
bio_generics = rpackages.importr("BiocGenerics")
cond = vec.DataFrame(condition=base.factor(base.c(base.colnames(r_counts))))
Expand Down