diff --git a/CHANGELOG b/CHANGELOG index aeda7ed..323b34c 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -21,5 +21,8 @@ Version 0.3.0 * differential_analysis allows to normalize the data * differential_analysis allows to input more than one dataset -Version 0.3.1 -* The orientation of the cell tissue image is correct now \ No newline at end of file +Version 0.3.2 +* The orientation of the cell tissue image is correct now + +Version 0.3.3 +* Fixed a bug in the visualization module \ No newline at end of file diff --git a/README.md b/README.md index e8b9c7e..007f35a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,22 @@ # Spatial Transcriptomics Analysis -Different tools for visualization, conversion and analysis of Spatial Transcriptomics data +Different tools for visualization, data processing, conversion and analysis (supervised and un-supervised learning, differential expression analysis, etc..) of Spatial Transcriptomics data (can also be used for single cell data). + +The package is compatible with the output format of the data generated with the ST Pipeline (https://github.com/SpatialTranscriptomicsResearch/st_pipeline) and give full support to plot the data onto the tissue images but it is compatible with any single cell datasets where the data is stored as a matrix of counts (genes as columns and spot/cells as rows). + +This package makes use of the following tools: + +t-SNE +https://github.com/lvdmaaten/bhtsne + +Scran +https://github.com/MarioniLab/Deconvolution2016 + +DESeq2 +http://bioconductor.org/packages/devel/bioc/html/DESeq2.html + +EdgeR +https://bioconductor.org/packages/release/bioc/html/edgeR.html ### License MIT License, see LICENSE file. diff --git a/scripts/differential_analysis.py b/scripts/differential_analysis.py index 551f5ae..92c4aeb 100644 --- a/scripts/differential_analysis.py +++ b/scripts/differential_analysis.py @@ -182,7 +182,7 @@ def main(counts_table_files, data_classes, a.scatter(x_points, y_points, c=colors, edgecolor="none") for x,y,text in izip(x_points_conf,y_points_conf,names_conf): a.text(x,y,text,size="x-small") - fig.savefig(os.path.join(outdir, "volcano_dataset{}_region{}_vs_dataset{}_region{}.png" + fig.savefig(os.path.join(outdir, "volcano_dataset{}_region{}_vs_dataset{}_region{}.pdf" .format(dataset_a, region_a, dataset_b, region_b)), dpi=300) if __name__ == '__main__': diff --git a/scripts/st_data_plotter.py b/scripts/st_data_plotter.py index 8a05c54..3417df4 100755 --- a/scripts/st_data_plotter.py +++ b/scripts/st_data_plotter.py @@ -46,14 +46,15 @@ def main(input_data, normalization, filter_genes, outfile, - use_log_scale): + use_log_scale, + title): if not os.path.isfile(input_data): sys.stderr.write("Error, input file/s not present or invalid format\n") sys.exit(1) if not outfile: - outfile = "data_plot.png" + outfile = "data_plot.pdf" # Extract data frame and normalize it if needed (genes as columns) counts_table = pd.read_table(input_data, sep="\t", header=0, index_col=0) @@ -113,7 +114,7 @@ def main(input_data, output="{}_{}".format("highlight",outfile), alignment=alignment, cmap=None, - title='ST Data scatter highlight', + title=title, xlabel='X', ylabel='Y', image=image, @@ -130,7 +131,7 @@ def main(input_data, output=outfile, alignment=alignment, cmap=plt.get_cmap("YlOrBr"), - title='ST Data scatter', + title=title, xlabel='X', ylabel='Y', image=image, @@ -179,10 +180,12 @@ def main(input_data, "REL = Each gene count divided by the total count of its spot\n" \ "(default: %(default)s)") parser.add_argument("--show-genes", help="Regular expression for gene symbols to be shown\n" \ - "If given only the genes matching the reg-exp will be shown", + "If given only the genes matching the reg-exp will be shown.\n" \ + "Can be given several times.", default=None, type=str, action='append') + parser.add_argument("--title", help="The title to show in the plot.", default="ST Data scatter", type=str) parser.add_argument("--outfile", type=str, help="Name of the output file") parser.add_argument("--use-log-scale", action="store_true", default=False, help="Use log2(counts + 1) values") args = parser.parse_args() @@ -198,4 +201,5 @@ def main(input_data, args.normalization, args.show_genes, args.outfile, - args.use_log_scale) + args.use_log_scale, + args.title) diff --git a/scripts/supervised.py b/scripts/supervised.py index 30fa9e9..05dbc2d 100644 --- a/scripts/supervised.py +++ b/scripts/supervised.py @@ -211,7 +211,7 @@ def main(train_data, scatter_plot(x_points=x_points, y_points=y_points, colors=merged_prob_colors, - output=os.path.join(outdir,"predicted_classes_tissue_probability.png"), + output=os.path.join(outdir,"predicted_classes_tissue_probability.pdf"), alignment=alignment_matrix, cmap=cm, title='Computed classes tissue (probability)', @@ -226,7 +226,7 @@ def main(train_data, scatter_plot(x_points=x_points, y_points=y_points, colors=[int(c) for c in predicted_class], - output=os.path.join(outdir,"predicted_classes_tissue.png"), + output=os.path.join(outdir,"predicted_classes_tissue.pdf"), alignment=alignment_matrix, cmap=None, title='Computed classes tissue', diff --git a/scripts/unsupervised.py b/scripts/unsupervised.py index af844c3..3165040 100644 --- a/scripts/unsupervised.py +++ b/scripts/unsupervised.py @@ -228,7 +228,7 @@ def main(counts_table_files, y_points=reduced_data[:,1], z_points=reduced_data[:,2], colors=labels, - output=os.path.join(outdir,"computed_classes.png"), + output=os.path.join(outdir,"computed_classes.pdf"), title='Computed classes', alpha=1.0, size=20) @@ -236,7 +236,7 @@ def main(counts_table_files, scatter_plot(x_points=reduced_data[:,0], y_points=reduced_data[:,1], colors=labels, - output=os.path.join(outdir,"computed_classes.png"), + output=os.path.join(outdir,"computed_classes.pdf"), title='Computed classes', alpha=1.0, size=20) @@ -273,7 +273,7 @@ def main(counts_table_files, scatter_plot(x_points=x_points, y_points=y_points, colors=colors_classes, - output=os.path.join(outdir,"computed_classes_tissue_{}.png".format(i)), + output=os.path.join(outdir,"computed_classes_tissue_{}.pdf".format(i)), alignment=alignment_matrix, cmap=None, title='Computed classes tissue', @@ -285,7 +285,7 @@ def main(counts_table_files, scatter_plot(x_points=x_points, y_points=y_points, colors=colors_dimensionality, - output=os.path.join(outdir,"dimensionality_color_tissue_{}.png".format(i)), + output=os.path.join(outdir,"dimensionality_color_tissue_{}.pdf".format(i)), alignment=alignment_matrix, cmap=plt.get_cmap("hsv"), title='Dimensionality color tissue', diff --git a/setup.py b/setup.py index 8739fe9..bf231ee 100755 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ setup( name = 'stanalysis', - version = "0.3.1", + version = "0.3.3", description = __doc__.split("\n", 1)[0], long_description = long_description, keywords = 'rna-seq analysis machine_learning spatial transcriptomics toolkit', diff --git a/stanalysis/alignment.py b/stanalysis/alignment.py index ec8ba58..0d090b2 100644 --- a/stanalysis/alignment.py +++ b/stanalysis/alignment.py @@ -16,16 +16,7 @@ def parseAlignmentMatrix(alignment_file): :param alignment_file: a file containing the 9 elements of a 3x3 matrix :return: a 3x3 matrix (default identify if error happens) """ - alignment_matrix = np.zeros((3,3), dtype=np.float) - alignment_matrix[0,0] = 1 - alignment_matrix[0,1] = 0 - alignment_matrix[0,2] = 0 - alignment_matrix[1,0] = 0 - alignment_matrix[1,1] = 1 - alignment_matrix[1,2] = 0 - alignment_matrix[2,0] = 0 - alignment_matrix[2,1] = 0 - alignment_matrix[2,2] = 1 + alignment_matrix = np.identity(3) if alignment_file is None or not os.path.isfile(alignment_file): return alignment_matrix with open(alignment_file, "r") as filehandler: diff --git a/stanalysis/visualization.py b/stanalysis/visualization.py index dd5aad0..12061c9 100644 --- a/stanalysis/visualization.py +++ b/stanalysis/visualization.py @@ -40,8 +40,7 @@ def histogram(x_points, output, title="Histogram", xlabel="X", color="blue"): # Tweak spacing to prevent clipping of ylabel plt.subplots_adjust(left=0.15) - fig.set_size_inches(16, 16) - fig.savefig(output, dpi=300) + fig.savefig(os.path.splitext(os.path.basename(output))[0], format='pdf', dpi=300) def scatter_plot3d(x_points, y_points, z_points, output=None, colors=None, cmap=None, title='Scatter', xlabel='X', @@ -91,7 +90,7 @@ def scatter_plot3d(x_points, y_points, z_points, output=None, a.set_title(title, size=10) # Save or show the plot if output is not None: - fig.savefig(output, dpi=300) + fig.savefig(os.path.splitext(os.path.basename(output))[0], format='pdf', dpi=300) else: fig.show() @@ -131,7 +130,7 @@ def scatter_plot(x_points, y_points, output=None, colors=None, extent_size = [1,33,35,1] # If alignment is None we re-size the image to chip size (1,1,33,35) # Otherwise we keep the image intact and apply the 3x3 transformation - if alignment is not None: + if alignment is not None and not np.array_equal(alignment, np.identity(3)): base_trans = transforms.Affine2D(matrix = alignment) + base_trans extent_size = None # We convert the list of color int values to color labels @@ -163,6 +162,6 @@ def scatter_plot(x_points, y_points, output=None, colors=None, plt.colorbar(sc) # Save or show the plot if output is not None: - fig.savefig(output, dpi=300) + fig.savefig(os.path.splitext(os.path.basename(output))[0], format='pdf', dpi=300) else: fig.show() \ No newline at end of file