SpatialTranscriptomicsResearch · jfnavarro · Mar 13, 2017 · Mar 6, 2017 · Mar 6, 2017 · Mar 9, 2017
diff --git a/CHANGELOG b/CHANGELOG
@@ -21,5 +21,8 @@ Version 0.3.0
 * differential_analysis allows to normalize the data
 * differential_analysis allows to input more than one dataset
 
-Version 0.3.1
-* The orientation of the cell tissue image is correct now
+Version 0.3.2
+* The orientation of the cell tissue image is correct now
+
+Version 0.3.3
+* Fixed a bug in the visualization module
diff --git a/README.md b/README.md
@@ -1,6 +1,22 @@
 # Spatial Transcriptomics Analysis 
 
-Different tools for visualization, conversion and analysis of Spatial Transcriptomics data
+Different tools for visualization, data processing, conversion and analysis (supervised and un-supervised learning, differential expression analysis, etc..) of Spatial Transcriptomics data (can also be used for single cell data).
+
+The package is compatible with the output format of the data generated with the ST Pipeline (https://github.com/SpatialTranscriptomicsResearch/st_pipeline) and give full support to plot the data onto the tissue images but it is compatible with any single cell datasets where the data is stored as a matrix of counts (genes as columns and spot/cells as rows). 
+
+This package makes use of the following tools:
+
+t-SNE
+https://github.com/lvdmaaten/bhtsne
+
+Scran
+https://github.com/MarioniLab/Deconvolution2016
+
+DESeq2
+http://bioconductor.org/packages/devel/bioc/html/DESeq2.html
+
+EdgeR
+https://bioconductor.org/packages/release/bioc/html/edgeR.html
 
 ### License
 MIT License, see LICENSE file.

diff --git a/scripts/differential_analysis.py b/scripts/differential_analysis.py
@@ -182,7 +182,7 @@ def main(counts_table_files, data_classes,
         a.scatter(x_points, y_points, c=colors, edgecolor="none")  
         for x,y,text in izip(x_points_conf,y_points_conf,names_conf):
             a.text(x,y,text,size="x-small")
-        fig.savefig(os.path.join(outdir, "volcano_dataset{}_region{}_vs_dataset{}_region{}.png"
+        fig.savefig(os.path.join(outdir, "volcano_dataset{}_region{}_vs_dataset{}_region{}.pdf"
                                  .format(dataset_a, region_a, dataset_b, region_b)), dpi=300)
 
 if __name__ == '__main__':

diff --git a/scripts/st_data_plotter.py b/scripts/st_data_plotter.py
@@ -46,14 +46,15 @@ def main(input_data,
          normalization,
          filter_genes,
          outfile,
-         use_log_scale):
+         use_log_scale,
+         title):
 
     if not os.path.isfile(input_data):
         sys.stderr.write("Error, input file/s not present or invalid format\n")
         sys.exit(1)
 
     if not outfile:
-        outfile = "data_plot.png"
+        outfile = "data_plot.pdf"
 
     # Extract data frame and normalize it if needed (genes as columns)
     counts_table = pd.read_table(input_data, sep="\t", header=0, index_col=0)
@@ -113,7 +114,7 @@ def main(input_data,
                      output="{}_{}".format("highlight",outfile),
                      alignment=alignment,
                      cmap=None,
-                     title='ST Data scatter highlight',
+                     title=title,
                      xlabel='X',
                      ylabel='Y',
                      image=image,
@@ -130,7 +131,7 @@ def main(input_data,
                  output=outfile,
                  alignment=alignment,
                  cmap=plt.get_cmap("YlOrBr"),
-                 title='ST Data scatter',
+                 title=title,
                  xlabel='X',
                  ylabel='Y',
                  image=image,
@@ -179,10 +180,12 @@ def main(input_data,
                         "REL = Each gene count divided by the total count of its spot\n" \
                         "(default: %(default)s)")
     parser.add_argument("--show-genes", help="Regular expression for gene symbols to be shown\n" \
-                        "If given only the genes matching the reg-exp will be shown",
+                        "If given only the genes matching the reg-exp will be shown.\n" \
+                        "Can be given several times.",
                         default=None,
                         type=str,
                         action='append')
+    parser.add_argument("--title", help="The title to show in the plot.", default="ST Data scatter", type=str)
     parser.add_argument("--outfile", type=str, help="Name of the output file")
     parser.add_argument("--use-log-scale", action="store_true", default=False, help="Use log2(counts + 1) values")
     args = parser.parse_args()
@@ -198,4 +201,5 @@ def main(input_data,
          args.normalization,
          args.show_genes,
          args.outfile,
-         args.use_log_scale)
+         args.use_log_scale,
+         args.title)
diff --git a/scripts/supervised.py b/scripts/supervised.py
@@ -211,7 +211,7 @@ def main(train_data,
     scatter_plot(x_points=x_points, 
                  y_points=y_points, 
                  colors=merged_prob_colors, 
-                 output=os.path.join(outdir,"predicted_classes_tissue_probability.png"), 
+                 output=os.path.join(outdir,"predicted_classes_tissue_probability.pdf"), 
                  alignment=alignment_matrix, 
                  cmap=cm, 
                  title='Computed classes tissue (probability)', 
@@ -226,7 +226,7 @@ def main(train_data,
     scatter_plot(x_points=x_points, 
                  y_points=y_points, 
                  colors=[int(c) for c in predicted_class], 
-                 output=os.path.join(outdir,"predicted_classes_tissue.png"), 
+                 output=os.path.join(outdir,"predicted_classes_tissue.pdf"), 
                  alignment=alignment_matrix, 
                  cmap=None, 
                  title='Computed classes tissue', 

diff --git a/scripts/unsupervised.py b/scripts/unsupervised.py
@@ -228,15 +228,15 @@ def main(counts_table_files,
                        y_points=reduced_data[:,1],
                        z_points=reduced_data[:,2],
                        colors=labels, 
-                       output=os.path.join(outdir,"computed_classes.png"), 
+                       output=os.path.join(outdir,"computed_classes.pdf"), 
                        title='Computed classes', 
                        alpha=1.0, 
                        size=20)
     else:
         scatter_plot(x_points=reduced_data[:,0], 
                      y_points=reduced_data[:,1],
                      colors=labels, 
-                     output=os.path.join(outdir,"computed_classes.png"), 
+                     output=os.path.join(outdir,"computed_classes.pdf"), 
                      title='Computed classes', 
                      alpha=1.0, 
                      size=20)          
@@ -273,7 +273,7 @@ def main(counts_table_files,
         scatter_plot(x_points=x_points, 
                      y_points=y_points,
                      colors=colors_classes,
-                     output=os.path.join(outdir,"computed_classes_tissue_{}.png".format(i)), 
+                     output=os.path.join(outdir,"computed_classes_tissue_{}.pdf".format(i)), 
                      alignment=alignment_matrix, 
                      cmap=None, 
                      title='Computed classes tissue', 
@@ -285,7 +285,7 @@ def main(counts_table_files,
         scatter_plot(x_points=x_points, 
                      y_points=y_points,
                      colors=colors_dimensionality, 
-                     output=os.path.join(outdir,"dimensionality_color_tissue_{}.png".format(i)), 
+                     output=os.path.join(outdir,"dimensionality_color_tissue_{}.pdf".format(i)), 
                      alignment=alignment_matrix, 
                      cmap=plt.get_cmap("hsv"), 
                      title='Dimensionality color tissue', 

diff --git a/setup.py b/setup.py
@@ -16,7 +16,7 @@
 
 setup(
   name = 'stanalysis',
-  version = "0.3.1",
+  version = "0.3.3",
   description = __doc__.split("\n", 1)[0],
   long_description = long_description,
   keywords = 'rna-seq analysis machine_learning spatial transcriptomics toolkit',

diff --git a/stanalysis/alignment.py b/stanalysis/alignment.py
@@ -16,16 +16,7 @@ def parseAlignmentMatrix(alignment_file):
     :param alignment_file: a file containing the 9 elements of a 3x3 matrix
     :return: a 3x3 matrix (default identify if error happens)
     """
-    alignment_matrix = np.zeros((3,3), dtype=np.float)
-    alignment_matrix[0,0] = 1
-    alignment_matrix[0,1] = 0
-    alignment_matrix[0,2] = 0
-    alignment_matrix[1,0] = 0
-    alignment_matrix[1,1] = 1
-    alignment_matrix[1,2] = 0
-    alignment_matrix[2,0] = 0
-    alignment_matrix[2,1] = 0
-    alignment_matrix[2,2] = 1
+    alignment_matrix = np.identity(3)
     if alignment_file is None or not os.path.isfile(alignment_file):
         return alignment_matrix
     with open(alignment_file, "r") as filehandler:

diff --git a/stanalysis/visualization.py b/stanalysis/visualization.py
@@ -40,8 +40,7 @@ def histogram(x_points, output, title="Histogram", xlabel="X", color="blue"):
 
     # Tweak spacing to prevent clipping of ylabel
     plt.subplots_adjust(left=0.15)
-    fig.set_size_inches(16, 16)
-    fig.savefig(output, dpi=300)
+    fig.savefig(os.path.splitext(os.path.basename(output))[0], format='pdf', dpi=300)
 
 def scatter_plot3d(x_points, y_points, z_points, output=None,
                    colors=None, cmap=None, title='Scatter', xlabel='X', 
@@ -91,7 +90,7 @@ def scatter_plot3d(x_points, y_points, z_points, output=None,
     a.set_title(title, size=10)
     # Save or show the plot
     if output is not None:
-        fig.savefig(output, dpi=300)
+        fig.savefig(os.path.splitext(os.path.basename(output))[0], format='pdf', dpi=300)
     else:
         fig.show()
 
@@ -131,7 +130,7 @@ def scatter_plot(x_points, y_points, output=None, colors=None,
     extent_size = [1,33,35,1]
     # If alignment is None we re-size the image to chip size (1,1,33,35)
     # Otherwise we keep the image intact and apply the 3x3 transformation
-    if alignment is not None:
+    if alignment is not None and not np.array_equal(alignment, np.identity(3)):
         base_trans = transforms.Affine2D(matrix = alignment) + base_trans
         extent_size = None
     # We convert the list of color int values to color labels
@@ -163,6 +162,6 @@ def scatter_plot(x_points, y_points, output=None, colors=None,
         plt.colorbar(sc)
     # Save or show the plot
     if output is not None:
-        fig.savefig(output, dpi=300)
+        fig.savefig(os.path.splitext(os.path.basename(output))[0], format='pdf', dpi=300)
     else:
         fig.show()