diff --git a/CHANGELOG b/CHANGELOG
index 436989b..caf4569 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -9,12 +9,12 @@ Version 0.3.0
 * Bug fixes
 * Optimized the normalization
 * Unsupervised allows to center size factors by mean
-* Unsupervised allows to computed adjuted log normalized counts
+* Unsupervised allows to computed adjusted log normalized counts
 * Unsupervised allows to compute the number of clusters automatically
 * Supervised allows to normalize the data
 * Supervised allows to input train/classes with different spots
   than in the train/test data and in different order
-* st_data_plotter allows to highlith selected spots
+* st_data_plotter allows to highlight selected spots
 * st_data_plotter only plots the spots where the gene is present
   when a gene reg-exp is given
 * st_data_plotter allows to normalize the data
@@ -35,4 +35,10 @@ Version 0.4.1
 * Fixed a bug in the noise filtering function
 
 Version 0.4.2
-* Added compatibility with Python 3
\ No newline at end of file
+* Added compatibility with Python 3
+
+Version 0.4.5
+* Added merge_replicates.py script
+* Added slice_regions_matrix.py script
+* Optimized and improved differential_analysis.py
+* Added compatibility with R 3.4 and rpy2 latest versions
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
index 6a7c52d..6d6ab8e 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,5 +1,5 @@
 The MIT License (MIT)
-Copyright (c) 2016 Jose Fernandez Navarro. 
+Copyright (c) 2017 Jose Fernandez Navarro, KTH. 
 
 Permission is hereby granted, free of charge, to any person obtaining 
 a copy of this software and associated documentation files (the "Software"), 
diff --git a/README.md b/README.md
index b887dc5..f7c30c3 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,14 @@
 # Spatial Transcriptomics Analysis 
 
-Different tools for visualization, data processing and analysis (supervised and un-supervised learning, differential expression analysis, etc..) of Spatial Transcriptomics data (can also be used for single cell data).
+Different tools for visualization, data processing and analysis (supervised and un-supervised learning,
+differential expression analysis, etc..) of Spatial Transcriptomics datasets (can also be used for single cell data).
 
-The package is compatible with the output format of the data generated with the ST Pipeline (https://github.com/SpatialTranscriptomicsResearch/st_pipeline) and give full support to plot the data onto the tissue images but it is compatible with any single cell datasets where the data is stored as a matrix of counts (genes as columns and spot/cells as rows). 
+The package is compatible with the output format of the data generated with the
+ST Pipeline (https://github.com/SpatialTranscriptomicsResearch/st_pipeline) and give full
+support to plot the data onto the tissue images but it is compatible with any single cell datasets
+where the data is stored as a matrix of counts (genes as columns and spot/cells as rows).
 
-This package makes use of the following tools:
+This package makes use of the following R packages:
 
 t-SNE
 https://github.com/lvdmaaten/bhtsne
@@ -27,9 +31,10 @@ See AUTHORS file.
 ### Contact
 For bugs, feedback or help you can contact Jose Fernandez Navarro <jose.fernandez.navarro@scilifelab.se>
 
-### Note
+### Input Format
 The referred matrix format is the ST data format, a matrix of counts where spot coordinates are row names
-and the genes are column names.
+and the genes are column names. This matrix format (.TSV) is generated with the
+[ST Pipeline](https://github.com/SpatialTranscriptomicsResearch/st_pipeline)
 
 The scripts that allow you to pass the tissue HE image can optionally take a 3x3 alignment file.
 If the images are cropped to the exact array boundaries the alignment file is not needed
@@ -44,12 +49,51 @@ Where each a correspondonds to a cell of the affine transformation matrix.
 
 ### Installation
 
-Note that the ST Analysis package requires R (https://cran.r-project.org/) installed in your system.
-To install the ST Analsysis packate just clone or download the repository, cd into the cloned folder and type:
+We recommend that you install the latest version 3.4.x. Once you have installed R you can open
+a R terminal or Rstudio and type the following:
 
-    python setup.py install
+    source("https://bioconductor.org/biocLite.R")
+    biocLite("monocle")
+    biocLite("scran")
+    biocLite("DESeq2")
+    biocLite("Rtsne")
+    biocLite("edgeR")
     
-A bunch of scripts will then be available in your system.
+Before you install the ST Analysis package we recommend that you create a Python 3 virtual
+environment. We recommend [Anaconda](https://anaconda.org/anaconda/python).
+The latest versions of rpy2 (R binder for Python) are only compatible with Python 3.
+
+#### OSX
+The following instructions are for installing the ST Analysis package with Python 3.4 and Anaconda
+(should be the same for Python 3.6)
+Note: we advice to update Xcode to the latest version.
+
+    conda create -n python3.4 python=3.4
+    source activate python3.4
+    brew install freetype
+    brew install gcc
+    export CC=/usr/local/Cellar/gcc/7.2.0/bin/gcc-7
+    pip install rpy2
+    export CC=/usr/bin/clang
+    conda install matplotlib
+    conda install pandas
+    conda install scikit-learn
+    python setup.py install
+
+#### Linux
+The following instructions are for installing the ST Analysis package with Python 3.4 and Anaconda
+(should be the same for Python 3.6)
+Note: we advice to install and update the developer tools packages
+
+    conda create -n python3.4 python=3.4
+    source activate python3.4
+    pip install rpy2
+    conda install matplotlib
+    conda install pandas
+    conda install scikit-learn
+    python setup.py install
+
+A bunch of scripts (described behind) will then be available in your system.
 Note that you can always type script_name.py --help to get more information
 about how the script works. 
 The ST Analysis package is compatible with Python 2 and 3 and we recomend to use
@@ -60,50 +104,79 @@ a virtual environment to make the installation of the dependencies easier.
 ## Analysis tools
 
 ### To do un-supervised learning
-To see how spots cluster together based on their expression profiles you can run : 
+To see how spots cluster together based on their expression profiles you can run:
 
     unsupervised.py --counts-table-files matrix_counts.tsv --normalization DESeq2 --num-clusters 5 --clustering KMeans --dimensionality tSNE --image-files tissue_image.JPG --use-log-scale 
     
-  The script can be given one or serveral datasets (matrices with counts). It will perform dimesionality reduction
-  and then cluster the spots together based on the dimesionality reduced coordinates. 
-  It generates a scatter plot of the clusters. It also generates an image for
-  each dataset of the predicted classes on top of the tissue image (tissue image for each dataset must be given and optionally 
-  an alignment file to convert to pixel coordiantes).
-  It also generate a file with the predicted classes for each spot that can be used in other analysis.
-  
-  To know more about the parameters you can type --help 
+The script can be given one or serveral datasets (matrices with counts). It will perform dimesionality reduction
+and then cluster the spots together based on the dimesionality reduced coordinates.
+It generates a scatter plot of the clusters. It also generates an image for
+each dataset of the predicted classes on top of the tissue image (tissue image for each dataset must be given and optionally
+an alignment file to convert to pixel coordiantes).
+It also generate a file with the predicted classes for each spot that can be used in other analysis.
+To know more about the parameters you can type --help
 
 ### To do supervised learning
 You can train a classifier with the expression profiles of a set of spots
-where you know the class (cell type) and then predict on a new dataset
-of the same tissue. For that you can use the following script :
+where you know the class (spot type) and then predict on a new dataset
+of the same tissue. For that you can use the following script:
 
     supervised.py --train-data data_matrix.tsv --test-data data_matrix.tsv --train-casses train_classes.txt --test-classes test_classes.txt --image tissue_image.jpg
     
-  This will generate some statistics, a file with the predicted classes for each spot and a plot of the predicted spots on top of the tissue image (if the image and the alignment matrix are given). 
-  The script can take several datasets for the training set and it allows to normalize the training and testing data.
-  
-  To know more about the parameters you can type --help
+This will generate some statistics, a file with the predicted classes for each spot and a plot of
+the predicted spots on top of the tissue image (if the image and the alignment matrix are given).
+The script can take several datasets for the training set and it allows to normalize the training and testing data.
+The test/train classes file shoud look like:
+
+XxY 1
+XxY 1
+XxY 2
+
+Where X is the spot X coordinate and Y is the spot Y coordinate and 1,1 and 2 are
+spot classes (regions).
+To know more about the parameters you can type --help
 
 ### To visualize ST data (output from the ST Pipeline) 
-Use the script st_data_plotter.py. It can plot ST data, it can use
-filters (counts or genes) it can highlight spots with reg. expressions
+Use the script st_data_plotter.py to plot ST data, it can use
+filters (counts or genes) it can highlight spots with regular expressions
 of genes and it can highlight spots by giving a file with spot coordinates
-and labels. You need a matrix with the gene counts by spot and optionally
-the a tissue image and an alignment matrix. A example run would be : 
+and labels. You can also normalize the data for visualization.
+You need a matrix with the gene counts and spots and optionally
+a tissue image and an optional alignment matrix. A example run would be:
 
-    st_data_plotter.py --cutoff 2 --filter-genes Actb* --image tissue_image.jpg --alignment alignment_file.txt data_matrix.tsv
+    st_data_plotter.py --cutoff 2 --show-genes Actb* --image tissue_image.jpg data_matrix.tsv
     
-  This will generate a scatter plot of the expression of the spots that contain a gene Actb and with higher expression than 2 and it will use the tissue image as background. You could optionally pass a list of spots with their classes (Generated with unsupervised.py) to highlight spots in the scatter plot. More info if you type --help
+This will generate a scatter plot of the expression of the spots that contain a gene Actb and
+with higher expression than 2 and it will use the tissue image as background.
+You could optionally pass a list of spots with their classes (Generated with unsupervised.py)
+to highlight spots in the scatter plot. More info if you type --help
   
+### To slice a matrix of counts based of regions of interest
+You can slice a dataset based on regions of interests (spots) obtained
+manually or with unsupervised.py. You need a file defining classes for each spot
+(unsupervised.py generates such files):
+
+XxY 1
+XxY 1
+XxY 2
+
+Where X is the spot X coordinate and Y is the spot Y coordinate and 1,1 and 2 are
+spot classes (regions).
+A example run would be:
+
+    slice_regions_matrix.py --counts-matrix dataset.tsv --spot-classes classes.txt --regions 1 3
+
 ### To perform Differential Expression Analysis (DEA)
-You can perform a D.E.A using the output from unsupervised.py and a list of groups to where the D.E.A will be performed.
-The scripts generates different plots and the list of D.E genes in a text file. Basically the script
-needs one or more matrices of counts with ST data (genes as columns), a tab delimited file with two columns where
-the first column is a class and the second is a spot (for each input matrix) and finally the list of comparisions to be made
-from the classes present in the data (for example: 0:1-0:2 0:1-0:5). Where 0 refers to the first input dataseet and 1,2,5 refers to
-the classes defined the classes file.
-
-    differential_analysis.py --input-data stdata.tsv --data-classes spot_classes.txt --condition-tuples 1-2 1-3
+You can perform a D.E.A between ST datasets (most likely regions of interests)
+The scripts generates different plots and the list of D.E. genes in a text file for each comparison.
+Basically the script needs one or more matrices of counts with ST data (genes as columns) and a list
+of comparisons to make:
+
+DATASET0-DATASET2 DATASET1-DATASET3 ...
+
+Where 0 refers to the first input dataset. The scripts allows for different normalization methods and
+different D.E.A. algorithms (see --help). An example run would be:
+
+    differential_analysis.py --input-data stdata_region1.tsv stdata_region2.tsv --comparisons 0-1
     
-  To know more about the parameters you can type --help
+To know more about the parameters you can type --help
diff --git a/scripts/differential_analysis.py b/scripts/differential_analysis.py
index 0aa33e6..e829181 100644
--- a/scripts/differential_analysis.py
+++ b/scripts/differential_analysis.py
@@ -1,30 +1,27 @@
 #! /usr/bin/env python
 """ 
 This script performs Differential Expression Analysis 
-using DESeq2 on a table with gene counts with the following format:
+using DESeq2 or Scran + DESeq2 on ST datasets.
+
+The script can take one or several datasets with the following format:
 
       GeneA   GeneB   GeneC
 1x1   
 1x2
 ...
 
-The script can take one or several datasets.
-The script also requires a file where spots are mapped
-to a class for each dataset. 
-This file is a tab delimited file like this:
+Ideally, each dataset (matrix) would correspond to a region
+of interest (Selection) to be compared. 
 
-CLASS SPOT 
+The script also needs the list of comparisons to make (1 vs 2, etc..)
+Each comparison will be performed between datasets and the input should be:
 
-The script also requires a  list of dataset:region-dataset:region 
-tuples to perform differential expression analysis. 
-For example 0:1-1:2 or 0:1-0:3.
-Where 0:1 represents dataset number 0 (inputted)
-and region 1 (from the classess tab delimited file)
+DATASET-DATASET DATASET-DATASET ...
 
-The script will output the list of up-regulated and down-regulated
-for each DEA comparison as well as a set of plots.
+The script will output the list of up-regulated and down-regulated genes
+for each possible DEA comparison (between tables) as well as a set of volcano plots.
 
-The script allows to normalize the data with different methods.
+NOTE: soon Monocle and edgeR will be added 
 
 @Author Jose Fernandez Navarro <jose.fernandez.navarro@scilifelab.se>
 """
@@ -34,59 +31,18 @@
 import numpy as np
 import pandas as pd
 from stanalysis.normalization import RimportLibrary
-from stanalysis.preprocessing import compute_size_factors, aggregate_datatasets
-import rpy2.robjects as robjects
-from rpy2.robjects import pandas2ri, r, numpy2ri
-robjects.conversion.py2ri = numpy2ri
+from stanalysis.preprocessing import compute_size_factors, aggregate_datatasets, remove_noise
+from stanalysis.visualization import volcano
+from stanalysis.analysis import dea
 import matplotlib.pyplot as plt
-   
-def dea(counts, conds, size_factors=None):
-    """Makes a call to DESeq2 to
-    perform D.E.A. in the given
-    counts matrix with the given conditions
-    """
-    try:
-        pandas2ri.activate()
-        deseq2 = RimportLibrary("DESeq2")
-        r("suppressMessages(library(DESeq2))")
-        # Create the R conditions and counts data
-        r_counts = pandas2ri.py2ri(counts)
-        cond = robjects.DataFrame({"conditions": robjects.StrVector(conds)})
-        design = r('formula(~ conditions)')
-        dds = r.DESeqDataSetFromMatrix(countData=r_counts, colData=cond, design=design)
-        if size_factors is None:
-            dds = r.DESeq(dds)
-        else:
-            assign_sf = r["sizeFactors<-"]
-            dds = assign_sf(object=dds, value=robjects.FloatVector(size_factors))
-            dds = r.estimateDispersions(dds)
-            dds = r.nbinomWaldTest(dds)
-        results = r.results(dds, contrast=r.c("conditions", "A", "B"))
-        results = pandas2ri.ri2py_dataframe(r['as.data.frame'](results))
-        results.index = counts.index
-        # Return the DESeq2 DEA results object
-        pandas2ri.deactivate()
-    except Exception as e:
-        raise e
-    return results
-              
-def main(counts_table_files, data_classes, 
-         conditions_tuples, outdir, fdr, normalization, num_exp_spots,
-         num_exp_genes):
+    
+def main(counts_table_files, comparisons, outdir, fdr, 
+         normalization, num_exp_spots, num_exp_genes):
 
     if len(counts_table_files) == 0 or \
     any([not os.path.isfile(f) for f in counts_table_files]):
         sys.stderr.write("Error, input file/s not present or invalid format\n")
         sys.exit(1)
-        
-    if len(data_classes) == 0 or \
-    any([not os.path.isfile(f) for f in counts_table_files]):
-        sys.stderr.write("Error, input file/s not present or invalid format\n")
-        sys.exit(1)
-        
-    if len(data_classes) != len(counts_table_files):
-        sys.stderr.write("Error, input file/s not present or invalid format\n")
-        sys.exit(1)
      
     if not outdir or not os.path.isdir(outdir):
         outdir = os.getcwd()
@@ -96,119 +52,66 @@ def main(counts_table_files, data_classes,
     # Merge input datasets (Spots are rows and genes are columns)
     counts = aggregate_datatasets(counts_table_files)
     
-    # loads all the classes for the spots
-    spot_classes = dict()
-    for i,class_file in enumerate(data_classes):
-        with open(class_file) as filehandler:
-            for line in filehandler.readlines():
-                tokens = line.split()
-                assert(len(tokens) == 2)
-                spot_classes["{}_{}".format(i,tokens[0])] = str(tokens[1])  
-    
-    # Spots as columns
-    counts = counts.transpose()  
-    # Iterate the conditions and create a new data frame
-    # with the datasets/regions specified in each condition
-    # NOTE this could be done more elegantly using slicing
-    for cond in conditions_tuples:
-        new_counts = counts.copy()
-        tokens = cond.split("-")
-        assert(len(tokens) == 2)
-        tokens_a = tokens[0].split(":")
-        assert(len(tokens_a) == 2)
-        tokens_b = tokens[1].split(":")
-        assert(len(tokens_b) == 2)
-        dataset_a = str(tokens_a[0])
-        dataset_b = str(tokens_b[0])
-        region_a = str(tokens_a[1])
-        region_b = str(tokens_b[1])
-        conds = list()
-        for spot in new_counts.columns:
-            try:
-                spot_class = spot_classes[spot]
-                spot_dataset = spot.split("_")[0]
-                if spot_dataset == dataset_a and spot_class == region_a:
-                    conds.append("A")
-                elif spot_dataset == dataset_b and spot_class == region_b:
-                    conds.append("B")
-                else:
-                    new_counts.drop(spot, axis=1, inplace=True)
-            except KeyError:
-                new_counts.drop(spot, axis=1, inplace=True)
-        # Make the DEA call
-        print("Doing DEA for the conditions {} with {} spots and {} genes".format(cond,
-                                                                                  len(new_counts.columns), 
-                                                                                  len(new_counts.index)))
-        
-        # Spots as rows
-        new_counts = new_counts.transpose()
+    # Remove noisy spots and genes (Spots are rows and genes are columns)
+    counts = remove_noise(counts, num_exp_genes / 100.0, num_exp_spots / 100.0, min_expression=2)
         
-        # Remove noise (spots)
-        num_spots = len(new_counts.index)
-        keep_indexes = (new_counts != 0).sum(axis=1) >= num_exp_genes
-        new_counts = new_counts[keep_indexes]
-        conds = np.asarray(conds)[np.where(keep_indexes)].tolist()
-        print("Dropped {} spots (less than {} genes detected)".
-              format(num_spots - len(new_counts.index), num_exp_genes))
+    # Get the comparisons as tuples
+    comparisons = [c.split("-") for c in comparisons]
     
-        # Remove noise (genes)
-        # Spots as columns 
-        new_counts = new_counts.transpose()
-        num_genes = len(new_counts.index)
-        keep_indexes = (new_counts >= 1).sum(axis=1) >= num_exp_spots
-        new_counts = new_counts[keep_indexes]
-        print("Dropped {} genes (less than {} spots detected)".
-              format(num_genes - len(new_counts.index), num_exp_spots))
+    # Get the conditions (each dataset will be one condition)
+    conds = [spot.split("_")[0] for spot in counts.index]
         
-        # Compute size factors
-        size_factors = compute_size_factors(new_counts.transpose(), normalization, scran_clusters=False)
-        if all(size_factors) == 0.0 or np.isnan(size_factors).any() \
-        or np.isinf(size_factors).any(): 
-            size_factors = None
+    # Check that the comparisons are valid and if not remove the invalid ones
+    comparisons = [c for c in comparisons if c[0] in conds and c[1] in conds]
+    if len(comparisons) == 0:
+        sys.stderr.write("Error, the vector of comparisons is invalid\n")
+        sys.exit(1)
+                      
+    # Make the DEA call
+    print("Doing DEA for the comparisons {} with {} spots and {} genes".format(comparisons,
+                                                                              len(counts.index), 
+                                                                              len(counts.columns)))
 
-        # DEA call
-        try:
-            dea_results = dea(new_counts, conds, size_factors)
-        except Exception as e:
-            print("Error while performing DEA " + str(e))
-            continue
-        dea_results = dea_results[pd.notnull(dea_results)]
-        dea_results.sort_values(by=["padj"], ascending=True, inplace=True, axis=0)
-        print("Writing results to output...")
-        dea_results.ix[dea_results["padj"] <= fdr].to_csv(os.path.join(outdir, 
-                                                                       "dea_results_dataset{}_region{}_vs_dataset{}_region{}.tsv"
-                                                                       .format(dataset_a, region_a, dataset_b, region_b)), sep="\t")
+    # Compute size factors
+    size_factors = compute_size_factors(counts, normalization, scran_clusters=False)
+    if all(size_factors) == 0.0 or np.isnan(size_factors).any() \
+    or np.isinf(size_factors).any(): 
+        print("There was an error computing size factors, using the DESeq2 method instead!")
+        size_factors = None
+        
+    # Spots as columns 
+    counts = counts.transpose()
+    
+    # DEA call
+    try:
+        dea_results = dea(counts, conds, comparisons, size_factors)
+    except Exception as e:
+        sys.stderr.write("Error while performing DEA " + str(e) + "\n")
+        sys.exit(1)
+    
+    assert(len(comparisons) == len(dea_results))
+    for dea_result, comp in zip(dea_results, comparisons):
+        # Filter results
+        dea_result = dea_result.loc[pd.notnull(dea_result["padj"])]
+        #dea_result = dea_results[pd.notnull(dea_result)["padj"]]
+        dea_result = dea_result.sort_values(by=["padj"], ascending=True, axis=0)
+        print("Writing DE genes to output using a FDR cut-off of {}".format(fdr))
+        dea_result.to_csv(os.path.join(outdir,
+                                       "dea_results_dataset{}_vs_dataset{}.tsv"
+                                       .format(comp[0], comp[1])), sep="\t")
+        dea_result.ix[dea_result["padj"] <= fdr].to_csv(os.path.join(outdir,
+                                                                     "filtered_dea_results_dataset{}_vs_dataset{}.tsv"
+                                                                     .format(comp[0], comp[1])), sep="\t")
         # Volcano plot
-        print("Generating plots...")
-        # Add colors according to differently expressed or not (needs a p-value parameter)
-        colors = ["red" if p <= fdr else "blue" for p in dea_results["padj"]]
-        fig, a = plt.subplots(figsize=(30, 30))
-        x_points = dea_results["log2FoldChange"]
-        y_points = -np.log10(dea_results["pvalue"])
-        x_points_conf = dea_results.ix[dea_results["padj"] <= fdr]["log2FoldChange"]
-        y_points_conf = -np.log10(dea_results.ix[dea_results["padj"] <= fdr]["pvalue"])
-        names_conf = dea_results.ix[dea_results["padj"] <= fdr].index
-        # Scale axes
-        OFFSET = 0.1
-        a.set_xlim([min(x_points) - OFFSET, max(x_points) + OFFSET])
-        a.set_ylim([min(y_points) - OFFSET, max(y_points) + OFFSET])
-        a.set_xlabel("Log2FoldChange")
-        a.set_ylabel("-log10(pvalue)")
-        a.set_title("Volcano plot", size=10)
-        a.scatter(x_points, y_points, c=colors, edgecolor="none")  
-        for x,y,text in zip(x_points_conf,y_points_conf,names_conf):
-            a.text(x,y,text,size="x-small")
-        fig.savefig(os.path.join(outdir, "volcano_dataset{}_region{}_vs_dataset{}_region{}.pdf"
-                                 .format(dataset_a, region_a, dataset_b, region_b)), dpi=300)
-                
+        print("Writing volcano plot to output")
+        outfile = os.path.join(outdir, "volcano_dataset{}_vs_dataset{}.pdf".format(comp[0], comp[1]))
+        volcano(dea_result, fdr, outfile)  
+        
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description=__doc__,
                                      formatter_class=argparse.RawTextHelpFormatter)
     parser.add_argument("--counts-table-files", required=True, nargs='+', type=str,
                         help="One or more matrices with gene counts per feature/spot (genes as columns)")
-    parser.add_argument("--data-classes", required=True, nargs='+', type=str,
-                        help="One or more delimited file/s with the classes mapping to the spots " \
-                        "(Class first column and spot second column)")
     parser.add_argument("--normalization", default="DESeq2", metavar="[STR]", 
                         type=str, 
                         choices=["RAW", "DESeq2", "DESeq2Linear", "DESeq2PseudoCount", 
@@ -224,18 +127,20 @@ def main(counts_table_files, data_classes,
                         "Scran = Deconvolution Sum Factors (Marioni et al)\n" \
                         "REL = Each gene count divided by the total count of its spot\n" \
                         "(default: %(default)s)")
-    parser.add_argument("--conditions-tuples", required=True, nargs='+', type=str,
-                        help="One of more tuples that represent what classes and datasets will be compared for DEA.\n" \
-                        "The notation is simple, DATASET_NUMBER1:REGION_NUMBER1-DATASET_NUMBER2:REGION_NUMBER2.\n" \
-                        "For example 0:1-1:2 1:1-1:3 0:2-0:1. Note that dataset number starts by 0.")
-    parser.add_argument("--num-exp-genes", default=5, metavar="[INT]", type=int, choices=range(0, 100),
-                        help="The number of expressed genes (!= 0) a spot must have to be kept (default: %(default)s)")
-    parser.add_argument("--num-exp-spots", default=5, metavar="[INT]", type=int, choices=range(0, 100),
-                        help="The number of expressed spots (!= 0) a gene must have to be kept (default: %(default)s)")
+    parser.add_argument("--comparisons", required=True, nargs='+', type=str,
+                        help="One of more tuples that represent what comparisons to make in the DEA.\n" \
+                        "The notation is simple: DATASET_NUMBER-DATASET_NUMBER DATASET_NUMBER:DATASET_NUMBER ...\n" \
+                        "For example 0-1 2-3. Note that dataset number starts by 0 and that the dataset 0\ " \
+                        "will be the 1st in the input and so.")
+    parser.add_argument("--num-exp-genes", default=10, metavar="[INT]", type=int, choices=range(0, 100),
+                        help="The percentage of number of expressed genes (> 1) a spot\n" \
+                        "must have to be kept from the distribution of all expressed genes (default: %(default)s)")
+    parser.add_argument("--num-exp-spots", default=1, metavar="[INT]", type=int, choices=range(0, 100),
+                        help="The percentage of number of expressed spots a gene " \
+                        "must have to be kept from the total number of spots (default: %(default)s)")
     parser.add_argument("--fdr", type=float, default=0.01,
                         help="The FDR minimum confidence threshold (default: %(default)s)")
     parser.add_argument("--outdir", help="Path to output dir")
     args = parser.parse_args()
-    main(args.counts_table_files, args.data_classes, args.conditions_tuples, 
-         args.outdir, args.fdr, args.normalization, args.num_exp_spots,
-         args.num_exp_genes)
\ No newline at end of file
+    main(args.counts_table_files, args.comparisons, args.outdir,
+         args.fdr, args.normalization, args.num_exp_spots, args.num_exp_genes)
\ No newline at end of file
diff --git a/scripts/filter_genes_matrix.py b/scripts/filter_genes_matrix.py
index fd8e8cc..482f26a 100644
--- a/scripts/filter_genes_matrix.py
+++ b/scripts/filter_genes_matrix.py
@@ -1,6 +1,6 @@
 #! /usr/bin/env python
 """ 
-Script that takes a matrix of counts
+Script that takes ST dataset (matrix of counts)
 where the columns are genes and the rows
 are spot coordinates
         gene    gene    
@@ -8,7 +8,9 @@
 XxY
 
 And removes the columns of genes
-matching the reg-exp given as input.
+matching the regular expression given as input.
+
+@Author Jose Fernandez Navarro <jose.fernandez.navarro@scilifelab.se>
 """
 
 import argparse
@@ -24,7 +26,7 @@ def main(counts_matrix, reg_exps, outfile):
         sys.exit(1)
      
     if not outfile:
-        outfile = "filtered_{}".format(os.path.basename(counts_matrix))
+        outfile = "filtered_{}".format(os.path.basename(counts_matrix).split(".")[0])
     
     # Read the data frame (genes as columns)
     counts_table = pd.read_table(counts_matrix, sep="\t", header=0, index_col=0)
diff --git a/scripts/st_data_plotter.py b/scripts/st_data_plotter.py
index b248bc2..b7df04f 100755
--- a/scripts/st_data_plotter.py
+++ b/scripts/st_data_plotter.py
@@ -1,9 +1,9 @@
 #! /usr/bin/env python
 """ 
-Script that creates a quality scatter plot from a ST-data file in matrix format
+Script that creates a scatter plot from a ST dataset in matrix format
 (genes as columns and spots as rows)
 
-The output will be a .png file with the same name as the input file if no name if given.
+The output will be an image file with the same name as the input file if no name if given.
 
 It allows to highlight spots with colors using a file with the following format : 
 
@@ -11,8 +11,7 @@
 
 Where INT represents a colour.
 
-When highlighting spots a new file will be created with the highlighted
-spots.
+When highlighting spots a new file will be created with the highlighted spots.
 
 It allows to choose transparency for the data points
 
diff --git a/scripts/supervised.py b/scripts/supervised.py
index f6d0182..5f786c6 100644
--- a/scripts/supervised.py
+++ b/scripts/supervised.py
@@ -1,6 +1,6 @@
 #! /usr/bin/env python
 """ 
-This script performs a supervised prediction in ST RNA-Seq data
+This script performs a supervised prediction in ST datasets
 using a training set and a test set. 
 
 The training set will be one or more matrices of
@@ -38,35 +38,9 @@
 from sklearn.multiclass import OneVsRestClassifier
 from stanalysis.visualization import scatter_plot, color_map
 from stanalysis.alignment import parseAlignmentMatrix
+from stanalysis.analysis import weighted_color, composite_colors
 from cProfile import label
 from matplotlib.colors import LinearSegmentedColormap
-from matplotlib import colors as mpcolors
-
-def linear_conv(old, min, max, new_min, new_max):
-    return ((old - min) / (max - min)) * ((new_max - new_min) + new_min)
-
-def weighted_color(colors, probs, n_bins=100):
-    """Compute a weighted 0-1 value given
-    a list of colours, probabalities and number of bins"""
-    n_classes = float(len(colors)-1)
-    l = 1.0 / n_bins
-    h = 1-l
-    p = 0.0
-    for i,prob in enumerate(probs):
-        wi = linear_conv(float(i),0.0,n_classes,h,l)
-        p += abs(prob * wi)
-    return p
-   
-def composite_colors(colors, probs):
-    """Merge the set of colors
-    given using a set of probabilities"""
-    merged_color = [0.0,0.0,0.0,1.0]
-    for prob,color in zip(probs,colors):
-        new_color = mpcolors.colorConverter.to_rgba(color)
-        merged_color[0] = (new_color[0] - merged_color[0]) * prob + merged_color[0]
-        merged_color[1] = (new_color[1] - merged_color[1]) * prob + merged_color[1]
-        merged_color[2] = (new_color[2] - merged_color[2]) * prob + merged_color[2]
-    return merged_color
 
 def main(train_data, 
          test_data, 
diff --git a/scripts/unsupervised.py b/scripts/unsupervised.py
index bb8b302..f0d6e9d 100644
--- a/scripts/unsupervised.py
+++ b/scripts/unsupervised.py
@@ -1,9 +1,8 @@
 #! /usr/bin/env python
 # -*- coding: utf-8 -*-
 """
-A script that does unsupervised
-classification on single cell data (Mainly used for Spatial Transcriptomics)
-It takes a list of data frames as input and outputs :
+A script that does unsupervised Spatial Transcriptomics datasets (matrix of counts)
+It takes a list of datasets as input and outputs :
 
  - a scatter plot with the predicted classes (coulored) for each spot 
  - the spots plotted onto the images (if given) with the predicted class/color
@@ -17,7 +16,8 @@
 be (1 and 2). 
 
 The user can select what clustering algorithm to use
-and what dimensionality reduction technique to use. 
+and what dimensionality reduction technique to use and normalization
+method to use. 
 
 Noisy spots (very few genes expressed) are removed using a parameter.
 Noisy genes (expressed in very few spots) are removed using a parameter.
@@ -38,46 +38,8 @@
 from stanalysis.visualization import scatter_plot, scatter_plot3d, histogram
 from stanalysis.preprocessing import *
 from stanalysis.alignment import parseAlignmentMatrix
-from stanalysis.normalization import RimportLibrary
+from stanalysis.analysis import Rtsne, linear_conv, computeNClusters
 import matplotlib.pyplot as plt
-import rpy2.robjects.packages as rpackages
-from rpy2.robjects import pandas2ri, r, globalenv
-base = rpackages.importr("base")
-
-def linear_conv(old, min, max, new_min, new_max):
-    return ((old - min) / (max - min)) * ((new_max - new_min) + new_min)
-    
-def Rtsne(counts, dimensions):
-    """Performs dimensionality reduction
-    using the R package Rtsne"""
-    pandas2ri.activate()
-    r_counts = pandas2ri.py2ri(counts)
-    tsne = RimportLibrary("Rtsne")    
-    as_matrix = r["as.matrix"]
-    tsne_out = tsne.Rtsne(as_matrix(counts), 
-                          dims=dimensions, 
-                          theta=0.5, 
-                          check_duplicates=False, 
-                          pca=True, 
-                          initial_dims=50, 
-                          perplexity=30, 
-                          max_iter=1000, 
-                          verbose=False)
-    pandas_tsne_out = pandas2ri.ri2py(tsne_out.rx2('Y'))
-    pandas2ri.deactivate()
-    return pandas_tsne_out
-
-def computeNClusters(counts, min_size=20):
-    """Computes the number of clusters
-    from the data using Scran::quickCluster"""
-    pandas2ri.activate()
-    r_counts = pandas2ri.py2ri(counts.transpose())
-    scran = RimportLibrary("scran")    
-    as_matrix = r["as.matrix"]
-    clusters = scran.quickCluster(as_matrix(r_counts), min_size)
-    n_clust = len(set(clusters))
-    pandas2ri.deactivate()
-    return n_clust
   
 def main(counts_table_files, 
          normalization, 
@@ -94,7 +56,9 @@ def main(counts_table_files,
          spot_size,
          top_genes_criteria,
          outdir,
-         use_adjusted_log):
+         use_adjusted_log,
+         tsne_perplexity,
+         tsne_theta):
 
     if len(counts_table_files) == 0 or \
     any([not os.path.isfile(f) for f in counts_table_files]):
@@ -112,21 +76,25 @@ def main(counts_table_files,
         sys.stderr.write("Error, the number of alignments given as " \
         "input is not the same as the number of images\n")
         sys.exit(1)   
-       
+         
     if use_adjusted_log and use_log_scale:
         sys.stdout.write("Warning, both log and adjusted log are enabled " \
                          "only adjusted log will be used\n")
         use_log_scale = False
-                          
+      
+    if tsne_theta < 0.0 or tsne_theta > 1.0:
+        sys.stdout.write("Warning, invalid value for theta. Using default..\n")
+        tsne_theta = 0.5
+                                    
     if outdir is None or not os.path.isdir(outdir): 
         outdir = os.getcwd()
     outdir = os.path.abspath(outdir)
-    
+                
     # Merge input datasets (Spots are rows and genes are columns)
     counts = aggregate_datatasets(counts_table_files)
 
     # Remove noisy spots and genes (Spots are rows and genes are columns)
-    counts = remove_noise(counts, num_exp_genes / 100.0, num_exp_spots / 100.0)
+    counts = remove_noise(counts, num_exp_genes / 100.0, num_exp_spots / 100.0, min_expression=2)
     
     if num_clusters is None:
         num_clusters = computeNClusters(counts)
@@ -149,7 +117,7 @@ def main(counts_table_files,
            
     if "tSNE" in dimensionality:
         #NOTE the Scipy tsne seems buggy so we use the R one instead
-        reduced_data = Rtsne(norm_counts, num_dimensions)
+        reduced_data = Rtsne(norm_counts, num_dimensions, theta=tsne_theta, perplexity=tsne_perplexity)
     elif "PCA" in dimensionality:
         # n_components = None, number of mle to estimate optimal
         decomp_model = PCA(n_components=num_dimensions, whiten=True, copy=True)
@@ -187,7 +155,7 @@ def main(counts_table_files,
     
     # Write the spots and their classes to a file
     file_writers = [open(os.path.join(outdir,"computed_classes_{}.txt".format(i)),"w") 
-                    for i in xrange(len(counts_table_files))]
+                    for i in range(len(counts_table_files))]
     # Write the coordinates and the label/class that they belong to
     for i,bc in enumerate(norm_counts.index):
         tokens = bc.split("x")
@@ -240,7 +208,7 @@ def main(counts_table_files,
     # Plot the spots with colors corresponding to the predicted class
     # Use the HE image as background if the image is given
     tot_spots = norm_counts.index
-    for i in xrange(len(counts_table_files)):
+    for i in range(len(counts_table_files)):
         # Get the list of spot coordinates and colors to plot for each dataset
         x_points = list()
         y_points = list()
@@ -315,10 +283,10 @@ def main(counts_table_files,
                         help="The number of clusters/regions expected to be found.\n" \
                         "If not given the number of clusters will be computed.")
     parser.add_argument("--num-exp-genes", default=10, metavar="[INT]", type=int, choices=range(0, 100),
-                        help="The percentage of number of expressed genes (!= 0) a spot\n" \
+                        help="The percentage of number of expressed genes (> 1) a spot\n" \
                         "must have to be kept from the distribution of all expressed genes (default: %(default)s)")
     parser.add_argument("--num-exp-spots", default=1, metavar="[INT]", type=int, choices=range(0, 100),
-                        help="The percentage of number of expressed spots (!= 0) a gene " \
+                        help="The percentage of number of expressed spots a gene " \
                         "must have to be kept from the total number of spots (default: %(default)s)")
     parser.add_argument("--num-genes-keep", default=20, metavar="[INT]", type=int, choices=range(0, 100),
                         help="The percentage of genes to discard from the distribution of all the genes\n" \
@@ -335,12 +303,17 @@ def main(counts_table_files,
     parser.add_argument("--use-log-scale", action="store_true", default=False,
                         help="Use log2(counts + 1) values in the dimensionality reduction step")
     parser.add_argument("--alignment-files", default=None, nargs='+', type=str,
-                        help="One or more tab delimited files containing and alignment matrix for the images\n" \
-                        "(array coordinates to pixel coordinates) as a 3x3 matrix in one row.\n" \
+                        help="One or more tab delimited files containing and alignment matrix for the images as\n" \
+                        "\t a11 a12 a13 a21 a22 a23 a31 a32 a33\n" \
                         "Only useful is the image has extra borders, for instance not cropped to the array corners\n" \
                         "or if you want the keep the original image size in the plots.")
+    parser.add_argument("--coordinate-files", default=None, nargs='+', type=str,
+                        help="One or more tab delimited files containing spot coordinates as\n" \
+                        "\t spot_x  spot_y  new_spot_x  new_spot_y  pixel_x  pixel-y \n" \
+                        "If provided the spot coordinates of each dataset will be updated and only the spots"
+                        "present in the file will be kept.")
     parser.add_argument("--image-files", default=None, nargs='+', type=str,
-                        help="When given the data will plotted on top of the image\n" \
+                        help="When provided the data will plotted on top of the image\n" \
                         "It can be one ore more, ideally one for each input dataset\n " \
                         "It is desirable that the image is cropped to the array\n" \
                         "corners otherwise an alignment file is needed")
@@ -356,7 +329,12 @@ def main(counts_table_files,
     parser.add_argument("--use-adjusted-log", action="store_true", default=False,
                         help="Use adjusted log normalized counts (R Scater::normalized()) "
                         "in the dimensionality reduction step (recommended with SCRAN normalization)")
+    parser.add_argument("--tsne-perplexity", default=30, metavar="[INT]", type=int, choices=[10,500],
+                        help="The value of the perplexity for the t-sne method. (default: %(default)s)")
+    parser.add_argument("--tsne-theta", default=0.5, metavar="[FLOAT]", type=float,
+                        help="The value of theta for the t-sne method. (default: %(default)s)")
     parser.add_argument("--outdir", default=None, help="Path to output dir")
+    
     args = parser.parse_args()
     main(args.counts_table_files, 
          args.normalization, 
@@ -373,5 +351,7 @@ def main(counts_table_files,
          args.spot_size,
          args.top_genes_criteria,
          args.outdir,
-         args.use_adjusted_log)
+         args.use_adjusted_log,
+         args.tsne_perplexity,
+         args.tsne_theta)
 
diff --git a/setup.py b/setup.py
index 5ed7d81..a288461 100755
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,7 @@
 #!/usr/bin/python
 """
 A tool kit for analysis, visualization and classification 
-of single cell data (Mainly Spatial Transcriptomics data)
+of Spatial Transcriptomics datasets
 """
 
 import os
@@ -16,7 +16,7 @@
 
 setup(
   name = 'stanalysis',
-  version = "0.4.2",
+  version = "0.4.5",
   description = __doc__.split("\n", 1)[0],
   long_description = long_description,
   keywords = 'rna-seq analysis machine_learning spatial transcriptomics toolkit',
@@ -35,7 +35,9 @@
     'pandas',
     'sklearn',
     'matplotlib',
-    'rpy2<=2.8.6'
+    'rpy2',
+    'Pillow',
+    'jinja2'
   ],
   #test_suite = 'tests',
   scripts = glob.glob('scripts/*.py'),
@@ -45,7 +47,9 @@
     'Topic :: Software Development',
     'Topic :: Scientific/Engineering :: Bio-Informatics',
     'License :: MIT:: Copyright Jose Fernandez Navarro',
-    'Programming Language :: Python :: 2.7'
+    'Programming Language :: Python :: 3.4',
+    'Programming Language :: Python :: 3.5',
+    'Programming Language :: Python :: 3.6',
     'Programming Language :: Python :: 3.7',
     'Environment :: Console',
   ],
diff --git a/stanalysis/normalization.py b/stanalysis/normalization.py
index 531c9a6..de9d547 100644
--- a/stanalysis/normalization.py
+++ b/stanalysis/normalization.py
@@ -94,7 +94,7 @@ def logCountsWithFactors(counts, size_factors):
         function(counts, size_factors){
           sce = newSCESet(countData=counts)
           sce@phenoData$size_factor = size_factors
-          sce = normalize(sce, recompute_cpm=FALSE)
+          sce = normalize(sce)
           norm_counts = sce@assayData$norm_exprs
           return(as.data.frame(norm_counts))
         }
diff --git a/stanalysis/preprocessing.py b/stanalysis/preprocessing.py
index 1108489..38f1017 100644
--- a/stanalysis/preprocessing.py
+++ b/stanalysis/preprocessing.py
@@ -9,6 +9,44 @@
 import os
 from stanalysis.normalization import *
 
+def merge_datasets(counts_tableA, counts_tableB, merging_action="SUM"):
+    """ This function merges two ST datasts (matrix of counts)
+    assuming that they are consecutive sections and that they
+    are aligned so each spot on the same position on the tissue
+    and the number of spots in the same.
+    The type of merging can be SUM (sum both counts) or AVG (average 
+    sum of both counts).
+    It returns the merged matrix of counts for the commong spots/genes.
+    :param counts_tableA: a ST matrix of counts
+    :param counts_tableB: a ST matrix of counts
+    :param merging_action: Either SUM or AVG (for the merging of counts)
+    :return: a ST matrix of counts with the merged counts (for common genes/spots)
+    """
+    merged_table = counts_tableA.copy()       
+    for indexA, indexB in zip(counts_tableA.index, counts_tableB.index):
+        tokens = indexA.split("x")
+        assert(len(tokens) == 2)
+        x_a = float(tokens[0])
+        y_a = float(tokens[1])
+        tokens = indexB.split("x")
+        assert(len(tokens) == 2)
+        x_b = float(tokens[0])
+        y_b = float(tokens[1]) 
+        if abs(x_a - x_b) > 0.6 or abs(y_a - y_b) > 0.6:
+            print("Spots {} and {} dot not match and will be skipped".format(indexA, indexB))
+            merged_table.drop(indexA, axis=0, inplace=True)
+            continue        
+        for geneA, geneB in zip(counts_tableA.loc[indexA], counts_tableB.loc[indexB]):
+            if geneA != geneB:
+                print("Genes {} and {} dot not match and will be skipped".format(geneA, geneB))
+                merged_table.drop(geneA, axis=1, inplace=True)
+            elif merging_action == "SUM":
+                merged_table.loc[indexA,geneA] += counts_tableB.loc[indexB, geneB]      
+            else:
+                merged_table.loc[indexA,geneA] += counts_tableB.loc[indexB, geneB]
+                merged_table.loc[indexA,geneA] /= 2
+    return merged_table
+
 def aggregate_datatasets(counts_table_files, plot_hist=False):
     """ This functions takes a list of data frames with ST data
     (genes as columns and spots as rows) and merges them into
diff --git a/stanalysis/visualization.py b/stanalysis/visualization.py
index 1215b19..1ddca68 100644
--- a/stanalysis/visualization.py
+++ b/stanalysis/visualization.py
@@ -13,6 +13,32 @@
              "saddlebrown", "darkcyan", "gray", "darkred", "darkgreen", "darkblue", 
              "antiquewhite", "bisque", "black"]
 
+def volcano(dea_results, fdr, outfile):
+    """ Generates a volcano plot for the given DEA results
+    :param dea_results: a data frame that must contains a padj 
+    and a log2FoldChange columns
+    :param fdr: the fdr threshold to apply (0-1)
+    :param outfile: the name of the output file
+    """
+    fig, a = plt.subplots(figsize=(30, 30))
+    colors = ["red" if p <= fdr else "blue" for p in dea_results["padj"]]
+    x_points = dea_results["log2FoldChange"]
+    y_points = -np.log10(dea_results["pvalue"])
+    x_points_conf = dea_results.ix[dea_results["padj"] <= fdr]["log2FoldChange"]
+    y_points_conf = -np.log10(dea_results.ix[dea_results["padj"] <= fdr]["pvalue"])
+    names_conf = dea_results.ix[dea_results["padj"] <= fdr].index
+    # Scale axes
+    OFFSET = 0.1
+    a.set_xlim([min(x_points) - OFFSET, max(x_points) + OFFSET])
+    a.set_ylim([min(y_points) - OFFSET, max(y_points) + OFFSET])
+    a.set_xlabel("Log2FoldChange")
+    a.set_ylabel("-log10(pvalue)")
+    a.set_title("Volcano plot", size=10)
+    a.scatter(x_points, y_points, c=colors, edgecolor="none")  
+    for x,y,text in zip(x_points_conf,y_points_conf,names_conf):
+        a.text(x,y,text,size="x-small")
+    fig.savefig(outfile, dpi=300)
+    
 def histogram(x_points, output, title="Histogram", xlabel="X", color="blue"):
     """ This function generates a simple density histogram
     with the points given as input.