diff --git a/biojava-aa-prop/pom.xml b/biojava-aa-prop/pom.xml
index 89ef2154ef..1a449c7ef8 100644
--- a/biojava-aa-prop/pom.xml
+++ b/biojava-aa-prop/pom.xml
@@ -2,7 +2,7 @@
biojava
org.biojava
- 4.2.6
+ 4.2.7
4.0.0
biojava-aa-prop
@@ -70,12 +70,12 @@
org.biojava
biojava-core
- 4.2.6
+ 4.2.7
org.biojava
biojava-structure
- 4.2.6
+ 4.2.7
diff --git a/biojava-alignment/pom.xml b/biojava-alignment/pom.xml
index 41389cdd4e..7dfe24c010 100644
--- a/biojava-alignment/pom.xml
+++ b/biojava-alignment/pom.xml
@@ -4,7 +4,7 @@
biojava
org.biojava
- 4.2.6
+ 4.2.7
biojava-alignment
biojava-alignment
@@ -46,7 +46,7 @@
org.biojava
biojava-core
- 4.2.6
+ 4.2.7
compile
@@ -74,7 +74,7 @@
org.biojava
biojava-phylo
- 4.2.6
+ 4.2.7
diff --git a/biojava-core/pom.xml b/biojava-core/pom.xml
index e3b0e9a7a1..90e465d2dc 100644
--- a/biojava-core/pom.xml
+++ b/biojava-core/pom.xml
@@ -3,7 +3,7 @@
biojava
org.biojava
- 4.2.6
+ 4.2.7
4.0.0
biojava-core
diff --git a/biojava-genome/pom.xml b/biojava-genome/pom.xml
index b382489846..c82fade42d 100644
--- a/biojava-genome/pom.xml
+++ b/biojava-genome/pom.xml
@@ -3,7 +3,7 @@
biojava
org.biojava
- 4.2.6
+ 4.2.7
4.0.0
biojava-genome
@@ -85,13 +85,13 @@
org.biojava
biojava-core
- 4.2.6
+ 4.2.7
compile
org.biojava
biojava-alignment
- 4.2.6
+ 4.2.7
compile
diff --git a/biojava-genome/src/main/java/org/biojava/nbio/genome/util/ChromosomeMappingTools.java b/biojava-genome/src/main/java/org/biojava/nbio/genome/util/ChromosomeMappingTools.java
index 07ace5278b..ea9538f333 100644
--- a/biojava-genome/src/main/java/org/biojava/nbio/genome/util/ChromosomeMappingTools.java
+++ b/biojava-genome/src/main/java/org/biojava/nbio/genome/util/ChromosomeMappingTools.java
@@ -13,8 +13,11 @@
import java.util.List;
/**
- * A class that take care of the painful mapping
+ * A class that can map chromosomal positions to mRNA (coding sequence) positions.
+ *
+ * @author Andreas Prlic
*/
+
public class ChromosomeMappingTools {
private static final Logger logger = LoggerFactory.getLogger(ChromosomeMappingTools.class);
@@ -915,7 +918,7 @@ private static List> getCDSExonRangesForward(GeneChromosomePositi
//
/**
- * I have a genomic coordinate, where is it in the Gene?
+ * I have a genomic coordinate, where is it on the mRNA
*
* @param coordinate
* @param chromosomePosition
@@ -924,7 +927,7 @@ private static List> getCDSExonRangesForward(GeneChromosomePositi
public static int getCDSPosForChromosomeCoordinate(int coordinate, GeneChromosomePosition chromosomePosition) {
if ( chromosomePosition.getOrientation() == '+')
- return getCDSPosForward(coordinate,
+ return getCDSPosForward(coordinate,
chromosomePosition.getExonStarts(),
chromosomePosition.getExonEnds(),
chromosomePosition.getCdsStart(),
@@ -935,301 +938,139 @@ public static int getCDSPosForChromosomeCoordinate(int coordinate, GeneChromosom
chromosomePosition.getExonEnds(),
chromosomePosition.getCdsStart(),
chromosomePosition.getCdsEnd());
-
}
+ /** Converts the genetic coordinate to the position of the nucleotide on the mRNA sequence for a gene
+ * living on the forward DNA strand.
+ *
+ * @param chromPos The genetic coordinate on a chromosome
+ * @param exonStarts The list holding the genetic coordinates pointing to the start positions of the exons (including UTR regions)
+ * @param exonEnds The list holding the genetic coordinates pointing to the end positions of the exons (including UTR regions)
+ * @param cdsStart The start position of a coding region
+ * @param cdsEnd The end position of a coding region
+ *
+ * @return the position of the nucleotide base on the mRNA sequence corresponding to the input genetic coordinate (base 1)
+ *
+ * @author Yana Valasatava
+ */
+ public static int getCDSPosForward(int chromPos, List exonStarts, List exonEnds,
+ int cdsStart, int cdsEnd) {
- public static int getCDSPosReverse(int chromPos, List exonStarts, List exonEnds,
- int cdsStart, int cdsEnd) {
- boolean inCoding = false;
- int codingLength = 0;
-
- if (cdsEnd < cdsStart) {
- int tmp = cdsEnd;
- cdsEnd = cdsStart;
- cdsStart = tmp;
- }
-
- logger.debug("looking for CDS position for " +format(chromPos));
-
-
- if ( chromPos < cdsStart+1 ) {
- // this is not in a coding region!
-
- logger.debug(chromPos + " < " + cdsStart+1 );
+ // the genetic coordinate is not in a coding region
+ if ( (chromPos < (cdsStart+1) ) || ( chromPos > (cdsEnd+1) ) ) {
+ logger.debug("The "+format(chromPos)+" position is not in a coding region");
return -1;
}
- if ( chromPos > cdsEnd+1 ) {
- // this is not in a coding region!
+ logger.debug("looking for CDS position for " +format(chromPos));
- logger.debug(chromPos + " > " + cdsEnd+1 );
- return -1;
+ // remove exons that are fully landed in UTRs
+ List tmpS = new ArrayList(exonStarts);
+ List tmpE = new ArrayList(exonEnds);
+
+ int j=0;
+ for (int i = 0; i < tmpS.size(); i++) {
+ if ( ( tmpE.get(i) < cdsStart) || ( tmpS.get(i) > cdsEnd) ) {
+ exonStarts.remove(j);
+ exonEnds.remove(j);
+ }
+ else {
+ j++;
+ }
}
- int lengthExons = 0;
-
- // map reverse
- for (int i = exonStarts.size() - 1; i >= 0; i--) {
-
- logger.debug("Reverse Exon #" + (i+1) + "/" + exonStarts.size());
- int end = exonStarts.get(i);
- int start = exonEnds.get(i);
-
- if (end < start) {
- int tmp = end;
- end = start;
- start = tmp;
- }
- lengthExons += end - start;
-
-
- logger.debug(" is " + format(chromPos) + " part of Reverse exon? s:" + format(start+1) + " - e:" + format(end) + " | " + (end - start+1));
- logger.debug(" CDS start: " + format(cdsStart+1) + "-" + format(cdsEnd) + " coding length counter:" + codingLength);
-
-
-
- if (start+1 <= cdsEnd && end >= cdsEnd ) {
-
- // first exon with UTR
-
- inCoding = true;
-
- int tmpstart = start;
- if (start < cdsStart) {
- tmpstart = cdsStart;
- }
-
-
- logger.debug(" --- codingLength " + codingLength +
- " s:" +
- format(tmpstart+1) +
- " e:" +
- format(cdsEnd) +
- " p:" +
- format(chromPos) + " tmp: " + (chromPos - cdsStart));
-
- logger.debug("check: " + (codingLength + cdsEnd - tmpstart+1) + " ==?? " + format(chromPos));
-
- int tmp = cdsEnd - chromPos ;
- // if (codingLength + cdsEnd - tmpstart >= chromPos) {
- //if (end >= chromPos && start + (end-start) >= chromPos) {
- // if (codingLength + cdsEnd - tmpstart >= chromPos) {
- if ( chromPos >= start +1 && chromPos <= end){
-
-
- logger.debug(" -> found position in UTR exon: P: " + format(chromPos) + " s:" + format(tmpstart+1) + " l:" + format(tmp) + " cdsS:" + format(cdsStart+1) + " cdsE:" + format(cdsEnd) + " codingL:" + codingLength);
- return codingLength + tmp;
- }
-
-
- logger.debug(" codinglength " + codingLength + " + " + (cdsEnd - tmpstart ) );
-
- // do not add 1 here
- codingLength += (cdsEnd - tmpstart );
-
- boolean debug = logger.isDebugEnabled();
-
- if (debug) {
- StringBuffer b = new StringBuffer();
- b.append(" UTR :" + format(cdsEnd + 1) + " - " + format(end) + newline);
- if (tmpstart == start)
- b.append(" -> ");
- else
- b.append(" <-> ");
- b.append("Reverse Exon :" + format(tmpstart+1) + " - " + (cdsEnd) + " | " + format(cdsEnd - tmpstart) + " - " + codingLength + " | " + (codingLength % 3) + newline);
-
- logger.debug(b.toString());
-
- // single exon with UTR on both ends
- if (tmpstart != start)
- logger.debug(" UTR :" + format(cdsStart - 1) + " - " + format(start));
- }
- } else if (start <= cdsStart && end >= cdsStart) {
-
- // terminal exon with UTR
- inCoding = false;
-
-
- logger.debug(format(start + codingLength + end - cdsStart) + " ?? " + format(chromPos));
- // (start + codingLength + end - cdsStart >= chromPos &&
- if (( start+1 <= chromPos) && ( end >= chromPos)) {
-
- //int tmp = end - cdsStart ;
-// int tmp = chromPos - cdsStart ;
-// int l = end - cdsStart;
- int tmp = end-chromPos ;
- if ( tmp > end -cdsStart) {
- tmp = end-cdsStart ;
-
- logger.debug("Adjust tmp to " + tmp);
- }
-
-
-
- logger.debug( codingLength + " | " + (end -chromPos) + " | " + (end - cdsStart) );
- logger.debug(" <- Exon : " + format(cdsStart) + " - " + format(end) + " | " + format(end - cdsStart +1) + " | ");
- logger.debug(" UTR : " + format(start+1) + " - " + format(cdsStart ));
- logger.debug(" <- YYY found position noncoding exon: #" + (i+1) + " " + format(chromPos) + " s:" + format(start) + " tmp: " + format(tmp) + " cdsStart" + format(cdsStart) + " cdl:" + codingLength + " " + format(cdsEnd));
-
- return codingLength + tmp;
- }
-
-
- logger.debug(" codinglength " + codingLength + " + " + (end - cdsStart) );
- codingLength += (end - cdsStart+1);
-
- logger.debug(" <- Exon : " + format(cdsStart+1) + " - " + format(end) + " | " + format(end - cdsStart) + " | " + codingLength + " | " + (codingLength % 3));
- logger.debug(" UTR : " + format(start+1) + " - " + format(cdsStart ));
-
- } else if (inCoding) {
- // standard coding exon
- // if (codingLength + end - start >= chromPos) {
- if ( chromPos >= start+1 && chromPos <= end) {
-
- int tmp = end -chromPos ;
- if ( tmp > (end-start+1)) {
-
- tmp = (end - start+1);
-
- logger.debug("Adjusting tmp to " + tmp);
- }
-
-
- logger.debug(" -> found position in reverse coding exon: #" + (i+1) + " chromPos:" + format(chromPos) + " start:" + format(start+1) + " end:" + format(end) + " tmp:" + tmp + " cdsStart:" + cdsStart + " codingLength:" + codingLength);
-
- return codingLength+tmp;
- }
-
- // full exon is coding
-
- logger.debug(" codinglength " + codingLength + " + " + (end - start) );
- // don't add 1
- codingLength += (end - start);
-
- logger.debug(" Exon : " + format(start+1) + " - " + format(end) + " | " + format(end - start) + " | " + codingLength + " | " + (codingLength % 3));
- } else {
- // e.g. see UBQLN3
+ // remove untranslated regions from exons
+ int nExons = exonStarts.size();
+ exonStarts.remove(0);
+ exonStarts.add(0, cdsStart);
+ exonEnds.remove(nExons-1);
+ exonEnds.add(cdsEnd);
- logger.debug(" no translation! cdl:" + codingLength);
-
- }
+ int codingLength = 0;
+ int lengthExon = 0;
- //if ( inCoding )
- // logger.debug(" exon phase at end:" + ((codingLength) % 3));
+ // map the genetic coordinate on a stretch of a reverse strand
+ for (int i = 0; i < nExons; i++) {
- logger.debug(" coding length: " + codingLength + "(phase:" + (codingLength % 3) + ") CDS POS trying to map:" + chromPos);
+ int start = exonStarts.get(i);
+ int end = exonEnds.get(i);
+ lengthExon = end - start;
+ if (start+1 <= chromPos && end >= chromPos ) {
+ return codingLength + (chromPos-start);
+ }
+ else {
+ codingLength += lengthExon;
+ }
}
-
- logger.debug("length exons: " + lengthExons);
- // could not map, or map over the full length??
-
-
return -1;
-
}
- /**
- * Get the chromosome position mapped onto the mrna CDS transcript position (needs to be divided by 3 to get protein coordinate)
+ /** Converts the genetic coordinate to the position of the nucleotide on the mRNA sequence for a gene
+ * living on the reverse DNA strand.
+ *
+ * @param chromPos The genetic coordinate on a chromosome
+ * @param exonStarts The list holding the genetic coordinates pointing to the start positions of the exons (including UTR regions)
+ * @param exonEnds The list holding the genetic coordinates pointing to the end positions of the exons (including UTR regions)
+ * @param cdsStart The start position of a coding region
+ * @param cdsEnd The end position of a coding region
*
- * @param exonStarts
- * @param exonEnds
- * @param cdsStart
- * @param cdsEnd
- * @return
- */
- public static int getCDSPosForward(int chromPos, List exonStarts, List exonEnds,
- int cdsStart, int cdsEnd) {
- boolean inCoding = false;
- int codingLength = 0;
-
-
- logger.debug("looking for CDS position for " +chromPos);
-
- int lengthExons = 0;
- // map forward
- for (int i = 0; i < exonStarts.size(); i++) {
-
-
- // start can include UTR
- int start = exonStarts.get(i);
- int end = exonEnds.get(i);
-
- lengthExons += end - start;
-
-
- logger.debug("forward exon: " + (start+1) + " - " + end + " | " + (end - start) + " ? overlaps with " + format(chromPos));
-
- if (start +1 <= cdsStart +1 && end >= cdsStart+1) {
-
- if (end >= chromPos) {
- // we are reaching our target position
- // -1 is important here...
- int tmp = chromPos - cdsStart -1;
-
-
- logger.debug("cdl:" + codingLength + " | " + tmp);
- logger.debug(" -> found position in UTR exon: " + chromPos + " " + format(start) + " " + format(tmp) + " " + cdsStart + " " + codingLength);
-
- return codingLength + tmp;
- }
- inCoding = true;
- codingLength += (end - cdsStart);
-
- logger.debug(" UTR : " + format(start) + " - " + (cdsStart ));
- logger.debug(" -> Exon : " + format(cdsStart+1) + " - " + format(end) + " | " + format(end - cdsStart) + " | " + codingLength + " | " + (codingLength % 3));
-
- } else if (start <= cdsEnd && end >= cdsEnd) {
- //logger.debug(" <-- CDS end at: " + cdsEnd );
- inCoding = false;
- if (cdsEnd >= chromPos && (start +1 <= chromPos)) {
- int tmp = chromPos - start -1 ;
-
-
- logger.debug(" -> cdsForward found position in non coding exon#"+i+": " + chromPos + " " + format(start+1) + " " + format(tmp) + " " + cdsStart + " " + codingLength);
- return codingLength + tmp ;
- }
- codingLength += (cdsEnd - start);
-
- logger.debug(" <- Exon : " + format(start+1) + " - " + format(cdsEnd) + " | " + format(cdsEnd - start+1) + " | " + codingLength + " | " + (codingLength % 3));
- logger.debug(" UTR : " + format(cdsEnd + 1) + " - " + format(end));
-
-
- } else if (inCoding) {
-
- if (end >= chromPos && (start +1 <=chromPos)) {
-
- int tmp = chromPos-start-1 ;
-
-
- logger.debug(codingLength + " | " + tmp);
- logger.debug(" -> found position in coding exon #" + (i + 1) + ": " + format(chromPos) + " " + format(start + 1) + " " + format(tmp) + " " + cdsStart + " " + codingLength);
-
-
- return codingLength + tmp ;
- }
- // full exon is coding
- codingLength += (end - start);
+ * @return the position of the nucleotide base on the mRNA sequence corresponding to the input genetic coordinate (base 1)
+ *
+ * @author Yana Valasatava
+ */
+ public static int getCDSPosReverse(int chromPos, List exonStarts, List exonEnds,
+ int cdsStart, int cdsEnd) {
- logger.debug(" Exon :" + format(start) + " - " + format(end) + " | " + format(end - start) + " | " + codingLength + " | " + (codingLength % 3));
- }
- // if ( inCoding )
- // logger.debug("exon phase at end:" + (codingLength % 3));
- //
- // logger.debug(" coding length: " + codingLength);
+ // the genetic coordinate is not in a coding region
+ if ( (chromPos < (cdsStart+1)) || ( chromPos > (cdsEnd+1) ) ) {
+ logger.debug("The "+format(chromPos)+" position is not in a coding region");
+ return -1;
+ }
+ logger.debug("looking for CDS position for " +format(chromPos));
+ // remove exons that are fully landed in UTRs
+ List tmpS = new ArrayList(exonStarts);
+ List tmpE = new ArrayList(exonEnds);
+
+ int j=0;
+ for (int i = tmpS.size() - 1; i >= 0; i--) {
+ if ( ( tmpE.get(i) < cdsStart) || ( tmpS.get(i) > cdsEnd) ) {
+ exonStarts.remove(j);
+ exonEnds.remove(j);
+ }
+ else {
+ j++;
+ }
}
- //logger.debug("length exons: " + lengthExons);
- //return codingLength - 3;
-
- // could not map!
+ // remove untranslated regions from exons
+ int nExons = exonStarts.size();
+ exonStarts.remove(0);
+ exonStarts.add(0, cdsStart);
+ exonEnds.remove(nExons-1);
+ exonEnds.add(cdsEnd);
+ int codingLength = 0;
+ int lengthExon = 0;
+
+ // map the genetic coordinate on a stretch of a reverse strand
+ for (int i = nExons - 1; i >= 0; i--) {
+
+ int start = exonStarts.get(i);
+ int end = exonEnds.get(i);
+
+ lengthExon = end - start;
+ // +1 offset to be a base 1
+ if (start+1 <= chromPos && end >= chromPos ) {
+ return codingLength + (end-chromPos+1);
+ }
+ else {
+ codingLength += lengthExon;
+ }
+ }
return -1;
}
-
-
}
diff --git a/biojava-genome/src/test/java/org/biojava/nbio/genome/TestGenomeMapping.java b/biojava-genome/src/test/java/org/biojava/nbio/genome/TestGenomeMapping.java
index b4615b9db2..7f9ded778a 100644
--- a/biojava-genome/src/test/java/org/biojava/nbio/genome/TestGenomeMapping.java
+++ b/biojava-genome/src/test/java/org/biojava/nbio/genome/TestGenomeMapping.java
@@ -10,142 +10,306 @@
import java.io.InputStream;
import java.net.URL;
+import java.util.Arrays;
import java.util.List;
import java.util.zip.GZIPInputStream;
/**
* Created by andreas on 7/19/16.
*/
-public class TestGenomeMapping extends TestCase{
+public class TestGenomeMapping extends TestCase {
- private static final String geneChromosomeFile = "http://cdn.rcsb.org/gene/hg38/geneChromosome38.tsf.gz";
+ private static final String geneChromosomeFile = "http://cdn.rcsb.org/gene/hg38/geneChromosome38.tsf.gz";
- private List gcps = null;
+ private List gcps = null;
- @Override
- protected void setUp() throws Exception {
- super.setUp();
- InputStream input = new GZIPInputStream(new URL(geneChromosomeFile).openStream());
- gcps = GeneChromosomePositionParser.getChromosomeMappings(input);
+ @Override
+ protected void setUp() throws Exception {
+ super.setUp();
+ InputStream input = new GZIPInputStream(new URL(geneChromosomeFile).openStream());
+ gcps = GeneChromosomePositionParser.getChromosomeMappings(input);
+ }
+ @Test
+ public void testAK1() {
+ String geneName = "AK1";
- }
+ assertNotNull(gcps);
+ assertTrue("Problems with downloading refFlat file from UCSC browser ", gcps.size() > 100);
+ int uniProtLength = 194;
- @Test
- public void testAK1() {
- String geneName = "AK1";
+ try {
- assertNotNull(gcps);
- assertTrue("Problems with downloading refFlat file from UCSC browser ", gcps.size() > 100);
+ for (GeneChromosomePosition pos : gcps) {
- int uniProtLength = 194;
+ //System.out.println(pos.getGeneName());
+ if (!pos.getGeneName().equals(geneName))
+ continue;
- try {
+ /// there are three alternative transcripts for AK1.
+ // we are just testing one here:
- for (GeneChromosomePosition pos : gcps) {
+ if ( ! pos.getGenebankId().equals("NM_000476"))
+ continue;
- //System.out.println(pos.getGeneName());
- if (!pos.getGeneName().equals(geneName))
- continue;
+ assertTrue(pos.getGeneName().equals(geneName));
+ assertTrue(pos.getOrientation().equals('-'));
+ assertTrue(pos.getChromosome().equals("chr9"));
- /// there are three alternative transcripts for AK1.
- // we are just testing one here:
+ List> cdsranges = ChromosomeMappingTools.getCDSExonRanges(pos);
- if ( ! pos.getGenebankId().equals("NM_000476"))
- continue;
+ validateExon(0,0,7, cdsranges );
+ validateExon(1,7,43, cdsranges );
+ validateExon(2,43,207, cdsranges );
+ validateExon(3,207,324, cdsranges );
+ validateExon(4,324,516, cdsranges );
+ validateExon(5,516,585, cdsranges );
- assertTrue(pos.getGeneName().equals(geneName));
- assertTrue(pos.getOrientation().equals('-'));
- assertTrue(pos.getChromosome().equals("chr9"));
- List> cdsranges = ChromosomeMappingTools.getCDSExonRanges(pos);
+ int cdslength = ChromosomeMappingTools.getCDSLength(pos);
- validateExon(0,0,7, cdsranges );
- validateExon(1,7,43, cdsranges );
- validateExon(2,43,207, cdsranges );
- validateExon(3,207,324, cdsranges );
- validateExon(4,324,516, cdsranges );
- validateExon(5,516,585, cdsranges );
+ assertTrue("CDS length should be 582, but is " + cdslength, cdslength == (uniProtLength *3));
+ List> chromranges = ChromosomeMappingTools.getChromosomalRangesForCDS(pos);
- int cdslength = ChromosomeMappingTools.getCDSLength(pos);
+ // we are reverse strand. reverse the order
+ chromranges = Lists.reverse(chromranges);
- assertTrue("CDS length should be 582, but is " + cdslength, cdslength == (uniProtLength *3));
+ assertTrue(chromranges.size() == 6);
- List> chromranges = ChromosomeMappingTools.getChromosomalRangesForCDS(pos);
+ // compare with https://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi?REQUEST=CCDS&DATA=CCDS6881
+ validateExon(0,127868008,127868076, chromranges );
+ validateExon(1,127868320,127868512, chromranges );
+ validateExon(2,127871822,127871939, chromranges );
+ validateExon(3,127872689,127872853, chromranges );
+ validateExon(4,127873025,127873061, chromranges );
+ validateExon(5,127874610,127874617, chromranges );
- // we are reverse strand. reverse the order
- chromranges = Lists.reverse(chromranges);
+ }
+ } catch (Exception e) {
+ fail(e.getMessage());
+ }
+ }
- assertTrue(chromranges.size() == 6);
+ @Test
+ public void testHBA(){
- // compare with https://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi?REQUEST=CCDS&DATA=CCDS6881
- validateExon(0,127868008,127868076, chromranges );
- validateExon(1,127868320,127868512, chromranges );
- validateExon(2,127871822,127871939, chromranges );
- validateExon(3,127872689,127872853, chromranges );
- validateExon(4,127873025,127873061, chromranges );
- validateExon(5,127874610,127874617, chromranges );
-
- }
- } catch (Exception e) {
- fail(e.getMessage());
- }
- }
-
- @Test
- public void testHBA(){
-
- String geneName = "HBA1";
- assertNotNull(gcps);
-
- assertTrue("Problems with downloading refFlat file from UCSC browser ", gcps.size() > 100);
-
- try {
-
- for ( GeneChromosomePosition pos : gcps){
-
- //System.out.println(pos.getGeneName());
- if ( ! pos.getGeneName().equals(geneName))
- continue;
-
- assertTrue(pos.getGeneName().equals("HBA1"));
- assertTrue(pos.getGenebankId().equals("NM_000558"));
- assertTrue(pos.getChromosome().equals("chr16"));
- assertTrue(pos.getTranscriptionStart().equals(176650));
- assertTrue(pos.getTranscriptionEnd().equals(177522));
- assertTrue(pos.getOrientation().equals('+'));
-
- List> cdsranges = ChromosomeMappingTools.getCDSExonRanges(pos);
-
- assertTrue(cdsranges.size() == 3);
-
- validateExon(0,0,95,cdsranges);
- validateExon(1,95,300,cdsranges);
- validateExon(2,300,429,cdsranges);
-
-
- List> chromranges = ChromosomeMappingTools.getChromosomalRangesForCDS(pos);
-
- validateExon(0,176716,176811, chromranges );
- validateExon(1,176928,177133, chromranges );
- validateExon(2,177282,177411, chromranges );
-
-
- }
- } catch (Exception e){
- fail(e.getMessage());
- }
-
-
- }
-
- private void validateExon(int exonNr, int start, int stop, List> cdsranges) {
-
- Range exon = cdsranges.get(exonNr);
- assertTrue("Exon " + exonNr + " boundary "+ exon.lowerEndpoint() + " does not match " +start , exon.lowerEndpoint().equals(start));
- assertTrue("Exon " + exonNr + " boundary " + exon.upperEndpoint() + " does not match " + stop, exon.upperEndpoint().equals(stop));
-
- }
+ String geneName = "HBA1";
+ assertNotNull(gcps);
+
+ assertTrue("Problems with downloading refFlat file from UCSC browser ", gcps.size() > 100);
+
+ try {
+
+ for ( GeneChromosomePosition pos : gcps){
+
+ //System.out.println(pos.getGeneName());
+ if ( ! pos.getGeneName().equals(geneName))
+ continue;
+
+ assertTrue(pos.getGeneName().equals("HBA1"));
+ assertTrue(pos.getGenebankId().equals("NM_000558"));
+ assertTrue(pos.getChromosome().equals("chr16"));
+ assertTrue(pos.getTranscriptionStart().equals(176650));
+ assertTrue(pos.getTranscriptionEnd().equals(177522));
+ assertTrue(pos.getOrientation().equals('+'));
+
+ List> cdsranges = ChromosomeMappingTools.getCDSExonRanges(pos);
+
+ assertTrue(cdsranges.size() == 3);
+
+ validateExon(0,0,95,cdsranges);
+ validateExon(1,95,300,cdsranges);
+ validateExon(2,300,429,cdsranges);
+
+
+ List> chromranges = ChromosomeMappingTools.getChromosomalRangesForCDS(pos);
+
+ validateExon(0,176716,176811, chromranges );
+ validateExon(1,176928,177133, chromranges );
+ validateExon(2,177282,177411, chromranges );
+
+
+ }
+ } catch (Exception e){
+ fail(e.getMessage());
+ }
+
+
+ }
+
+ private void validateExon(int exonNr, int start, int stop, List> cdsranges) {
+
+ Range exon = cdsranges.get(exonNr);
+ assertTrue("Exon " + exonNr + " boundary "+ exon.lowerEndpoint() + " does not match " +start , exon.lowerEndpoint().equals(start));
+ assertTrue("Exon " + exonNr + " boundary " + exon.upperEndpoint() + " does not match " + stop, exon.upperEndpoint().equals(stop));
+
+ }
+
+ /** Get the position of the nucleotide base corresponding to the position of that base on the mRNA sequence
+ * for a gene living on the reverse DNA strand.
+ *
+ * @author Yana Valasatava
+ */
+ private int getPositionInmRNA(String geneName, String genebankId, int posChrom) {
+ for (GeneChromosomePosition gcp : gcps) {
+ if ( gcp.getGeneName().equals(geneName) ) {
+ if ( gcp.getGenebankId().equals(genebankId) ) {
+ return ChromosomeMappingTools.getCDSPosForChromosomeCoordinate(posChrom, gcp);
+ }
+ }
+ }
+ return -1;
+ }
+
+ /** Make sure the mapping tool correctly retrieves the mRNA position for a gene
+ * living on the forward DNA strand for different chromosome positions.
+ *
+ * @author Yana Valasatava
+ */
+ @Test
+ public void testForwardMappingPositions() {
+
+ String geneName = "HORMAD2"; // gene on the forward DNA strand
+ String genebankId = "NM_152510"; // GeneBank ID for the transcript used for testing (ENST00000336726)
+
+ List scenarios = Arrays.asList("first1exon", "last1exon", "last3exon");
+
+ int cds;
+ int posExonStart;
+ int posInmRNA;
+ for (String scenario : scenarios) {
+
+ switch (scenario) {
+
+ case "first1exon":
+ posExonStart = 30093953; // ending position of the last exon coding region (on forward strand)
+ posInmRNA = 1; // base 1 position in mRNA sequence
+ cds = getPositionInmRNA(geneName, genebankId, posExonStart);
+ assertEquals(cds, posInmRNA);
+ break;
+
+ case "last1exon":
+ posExonStart = 30094003; // starting position of the last exon coding region (on forward strand)
+ posInmRNA = 51; // position in mRNA sequence equals to the length of the exon
+ cds = getPositionInmRNA(geneName, genebankId, posExonStart);
+ assertEquals(cds, posInmRNA);
+ break;
+
+ case "last3exon":
+ posExonStart = 30103500; // starting position of the first base in a coding region (3rd exon)
+ posInmRNA = 257; // position in mRNA sequence equals to the sum length of the 3 last exons
+ cds = getPositionInmRNA(geneName, genebankId, posExonStart);
+ assertEquals(cds, posInmRNA);
+ break;
+ }
+ }
+ }
+
+ /** Make sure the mapping tool correctly retrieves the mRNA position for a gene
+ * living on the reverse DNA strand for different chromosome positions.
+ *
+ * @author Yana Valasatava
+ */
+ @Test
+ public void testReverseMappingPositions() {
+
+ String geneName = "BCL11B"; // gene on the reverse DNA strand
+ String genebankId = "NM_138576"; // GeneBank ID for the transcript used for testing (ENST00000357195)
+
+ List scenarios = Arrays.asList("first1exon", "last1exon", "last3exon");
+
+ int cds;
+ int posExonStart;
+ int posInmRNA;
+ for (String scenario : scenarios) {
+
+ switch (scenario) {
+
+ case "first1exon":
+ posExonStart = 99271218; // ending position of the last exon coding region (on forward strand)
+ posInmRNA = 1; // base 1 position in mRNA sequence
+ cds = getPositionInmRNA(geneName, genebankId, posExonStart);
+ assertEquals(cds, posInmRNA);
+ break;
+
+ case "last1exon":
+ posExonStart = 99271161; // starting position of the last exon coding region (on forward strand)
+ posInmRNA = 58; // position in mRNA sequence equals to the length of the exon
+ cds = getPositionInmRNA(geneName, genebankId, posExonStart);
+ assertEquals(cds, posInmRNA);
+ break;
+
+ case "last3exon":
+ posExonStart = 99231345; // starting position of the first base in a coding region (3rd exon)
+ posInmRNA = 640; // position in mRNA sequence equals to the sum length of the 3 last exons
+ cds = getPositionInmRNA(geneName, genebankId, posExonStart);
+ assertEquals(cds, posInmRNA);
+ break;
+ }
+ }
+ }
+
+ /** Test to make sure the mapping tool correctly identify that position falls outside the coding region
+ * for a gene living on the forward DNA strand.
+ *
+ * @author Yana Valasatava
+ */
+ @Test
+ public void testForwardMappingForExonBoundaries() {
+
+ String geneName = "HBA1"; // gene on the reverse DNA strand
+ String genebankId = "NM_000558"; // GeneBank ID for the transcript used for testing (ENST00000320868)
+
+ int posExonStart = 176717; // starting position of the first base in a coding region (1st exon)
+ int posExonEnd = 176811; // ending position of the first base in a coding region (1st exon)
+
+ int cdsSE = getPositionInmRNA(geneName, genebankId, posExonStart-1);
+ assertEquals(cdsSE, -1);
+
+ int cdsEE = getPositionInmRNA(geneName, genebankId, posExonEnd+1);
+ assertEquals(cdsEE, -1);
+ }
+
+ /** Test to make sure the mapping tool correctly identify that position falls outside the coding region
+ * for a gene living on the reverse DNA strand.
+ *
+ * @author Yana Valasatava
+ */
+ @Test
+ public void testReverseMappingForExonBoundaries() {
+
+ String geneName = "BCL11B"; // gene on the reverse DNA strand
+ String genebankId = "NM_138576"; // GeneBank ID for the transcript used for testing (ENST00000357195)
+
+ int posExonStart = 99174151; // starting position of the first base in a coding region (1st exon)
+ int posExonEnd = 99176195; // ending position of the first base in a coding region (1st exon)
+
+ int cdsSE = getPositionInmRNA(geneName, genebankId, posExonStart-1);
+ assertEquals(cdsSE, -1);
+
+ int cdsEE = getPositionInmRNA(geneName, genebankId, posExonEnd+1);
+ assertEquals(cdsEE, -1);
+ }
+
+ /** Test to make sure the mapping tool correctly converts the genetic position to a position on mRNA
+ * when multiple UTR regions are consecutive.
+ *
+ * @author Yana Valasatava
+ */
+ @Test
+ public void testMappingCromosomePosTomRNAMultiUTRs() {
+
+ String geneName = "ILK"; // gene on the reverse DNA strand
+ String genebankId = "NM_001278442"; // GeneBank ID for the transcript used for testing (ENST00000532063)
+
+ int chromPos = 6608760;
+ int mRNAPos = 16;
+
+ int cds = getPositionInmRNA(geneName, genebankId, chromPos);
+ assertEquals(cds, mRNAPos);
+
+ }
}
+
diff --git a/biojava-integrationtest/pom.xml b/biojava-integrationtest/pom.xml
index dca85357b3..c52783d2a1 100644
--- a/biojava-integrationtest/pom.xml
+++ b/biojava-integrationtest/pom.xml
@@ -4,7 +4,7 @@
biojava
org.biojava
- 4.2.6
+ 4.2.7
biojava-integrationtest
jar
@@ -32,7 +32,7 @@
org.biojava
biojava-structure
- 4.2.6
+ 4.2.7
diff --git a/biojava-modfinder/pom.xml b/biojava-modfinder/pom.xml
index e969b11f09..0ca1247f39 100644
--- a/biojava-modfinder/pom.xml
+++ b/biojava-modfinder/pom.xml
@@ -4,7 +4,7 @@
biojava
org.biojava
- 4.2.6
+ 4.2.7
biojava-modfinder
biojava-modfinder
@@ -31,7 +31,7 @@
org.biojava
biojava-structure
- 4.2.6
+ 4.2.7
jar
compile
diff --git a/biojava-ontology/pom.xml b/biojava-ontology/pom.xml
index 305039b548..03b1c00b76 100644
--- a/biojava-ontology/pom.xml
+++ b/biojava-ontology/pom.xml
@@ -4,7 +4,7 @@
org.biojava
biojava
- 4.2.6
+ 4.2.7
biojava-ontology
diff --git a/biojava-phylo/pom.xml b/biojava-phylo/pom.xml
index bc55ac6307..d374a993fd 100644
--- a/biojava-phylo/pom.xml
+++ b/biojava-phylo/pom.xml
@@ -3,7 +3,7 @@
biojava
org.biojava
- 4.2.6
+ 4.2.7
4.0.0
biojava-phylo
@@ -44,7 +44,7 @@
org.biojava
biojava-core
- 4.2.6
+ 4.2.7
compile
diff --git a/biojava-protein-disorder/pom.xml b/biojava-protein-disorder/pom.xml
index b95632bc2c..73c642d3e1 100644
--- a/biojava-protein-disorder/pom.xml
+++ b/biojava-protein-disorder/pom.xml
@@ -3,7 +3,7 @@
biojava
org.biojava
- 4.2.6
+ 4.2.7
biojava-protein-disorder
jar
@@ -63,7 +63,7 @@
org.biojava
biojava-core
- 4.2.6
+ 4.2.7
diff --git a/biojava-sequencing/pom.xml b/biojava-sequencing/pom.xml
index 0990a8e9b2..0b4d66e069 100644
--- a/biojava-sequencing/pom.xml
+++ b/biojava-sequencing/pom.xml
@@ -3,7 +3,7 @@
biojava
org.biojava
- 4.2.6
+ 4.2.7
4.0.0
biojava-sequencing
@@ -47,7 +47,7 @@
org.biojava
biojava-core
- 4.2.6
+ 4.2.7
compile
diff --git a/biojava-structure-gui/pom.xml b/biojava-structure-gui/pom.xml
index f96aad2a97..cfd684d103 100644
--- a/biojava-structure-gui/pom.xml
+++ b/biojava-structure-gui/pom.xml
@@ -3,7 +3,7 @@
biojava
org.biojava
- 4.2.6
+ 4.2.7
4.0.0
biojava-structure-gui
@@ -25,13 +25,13 @@
org.biojava
biojava-structure
- 4.2.6
+ 4.2.7
compile
org.biojava
biojava-core
- 4.2.6
+ 4.2.7
compile
diff --git a/biojava-structure/pom.xml b/biojava-structure/pom.xml
index ac6de4757a..db1c41a4f1 100644
--- a/biojava-structure/pom.xml
+++ b/biojava-structure/pom.xml
@@ -4,7 +4,7 @@
biojava
org.biojava
- 4.2.6
+ 4.2.7
biojava-structure
biojava-structure
@@ -22,13 +22,13 @@
org.biojava
biojava-alignment
- 4.2.6
+ 4.2.7
compile
org.biojava
biojava-core
- 4.2.6
+ 4.2.7
compile
diff --git a/biojava-survival/pom.xml b/biojava-survival/pom.xml
index 1f03bad9be..e6c0294b34 100644
--- a/biojava-survival/pom.xml
+++ b/biojava-survival/pom.xml
@@ -4,7 +4,7 @@
org.biojava
biojava
- 4.2.6
+ 4.2.7
biojava-survival
diff --git a/biojava-ws/pom.xml b/biojava-ws/pom.xml
index 646bd14a0b..0f2e83e414 100644
--- a/biojava-ws/pom.xml
+++ b/biojava-ws/pom.xml
@@ -3,7 +3,7 @@
biojava
org.biojava
- 4.2.6
+ 4.2.7
biojava-ws
biojava-ws
@@ -19,7 +19,7 @@
org.biojava
biojava-core
- 4.2.6
+ 4.2.7
compile
diff --git a/biojava-ws/src/main/java/org/biojava/nbio/ws/hmmer/RemoteHmmerScan.java b/biojava-ws/src/main/java/org/biojava/nbio/ws/hmmer/RemoteHmmerScan.java
index 4e83b61ec8..586aad785f 100644
--- a/biojava-ws/src/main/java/org/biojava/nbio/ws/hmmer/RemoteHmmerScan.java
+++ b/biojava-ws/src/main/java/org/biojava/nbio/ws/hmmer/RemoteHmmerScan.java
@@ -23,6 +23,8 @@
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
import org.biojava.nbio.core.sequence.ProteinSequence;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.*;
import java.net.HttpURLConnection;
@@ -31,14 +33,17 @@
import java.util.TreeSet;
-/** Makes remote calls to the HMMER web service at the EBI web site and returns Pfam domain annotations for an input protein sequence.
+/**
+ * Makes remote calls to the HMMER web service at the EBI web site and returns Pfam domain annotations for an input protein sequence.
*
* @author Andreas Prlic
* @since 3.0.3
*/
public class RemoteHmmerScan implements HmmerScan {
- public static String HMMER_SERVICE = "http://www.ebi.ac.uk/Tools/hmmer/search/hmmscan";
+ private static final Logger LOGGER = LoggerFactory.getLogger(RemoteHmmerScan.class);
+
+ public static final String HMMER_SERVICE = "http://www.ebi.ac.uk/Tools/hmmer/search/hmmscan";
public RemoteHmmerScan(){
@@ -54,7 +59,8 @@ public SortedSet scan(ProteinSequence sequence) throws IOException
}
- /** Scans a protein sequence for Pfam profile matches.
+ /**
+ * Scans a protein sequence for Pfam profile matches.
*
* @param sequence
* @param serviceLocation
@@ -68,7 +74,7 @@ public SortedSet scan(ProteinSequence sequence, URL serviceLocation
postContent.append("hmmdb=pfam");
- // by default hmmscan runs with the HMMER3 cut_ga parameter enabled, the "gathering freshold", which depends on
+ // by default hmmscan runs with the HMMER3 cut_ga parameter enabled, the "gathering threshold", which depends on
// the cutoffs defined in the underlying HMM files.
// to request a different cutoff by e-value this could be enabled:
//postContent.append("&E=1");
@@ -86,7 +92,7 @@ public SortedSet scan(ProteinSequence sequence, URL serviceLocation
connection.setRequestMethod("POST");
connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
- connection.setRequestProperty("Accept:","application/json");
+ connection.setRequestProperty("Accept","application/json");
connection.setRequestProperty("Content-Length", "" +
Integer.toString(postContent.toString().getBytes().length));
@@ -99,13 +105,12 @@ public SortedSet scan(ProteinSequence sequence, URL serviceLocation
wr.close ();
-// //Now get the redirect URL
+ //Now get the redirect URL
URL respUrl = new URL( connection.getHeaderField( "Location" ));
int responseCode = connection.getResponseCode();
if ( responseCode == 500){
- System.err.println("something went wrong!" + serviceLocation);
- System.err.println(connection.getResponseMessage());
+ LOGGER.warn("Got 500 response code for URL {}. Response message: {}.", serviceLocation, connection.getResponseMessage());
}
HttpURLConnection connection2 = (HttpURLConnection) respUrl.openConnection();
@@ -122,7 +127,6 @@ public SortedSet scan(ProteinSequence sequence, URL serviceLocation
StringBuffer result = new StringBuffer();
while ((inputLine = in.readLine()) != null) {
- //System.out.println(inputLine);
result.append(inputLine);
}
@@ -141,7 +145,6 @@ public SortedSet scan(ProteinSequence sequence, URL serviceLocation
for(int i =0 ; i < hits.size() ; i++){
JSONObject hit = hits.getJSONObject(i);
- //System.out.println("hit: "+ hit);
HmmerResult hmmResult = new HmmerResult();
@@ -170,13 +173,8 @@ public SortedSet scan(ProteinSequence sequence, URL serviceLocation
SortedSet domains = new TreeSet();
for ( int j= 0 ; j < hmmdomains.size() ; j++){
JSONObject d = hmmdomains.getJSONObject(j);
- //System.out.println(d);
Integer is_included = getInteger(d.get("is_included"));
if ( is_included == 0) {
-// System.out.println(" excluding: " + d.get("alihmmdesc") + " " + d.get("alihmmname") + " " +
-// hit.get("evalue") + " " +
-// d.get("alisqfrom") + " " +
-// d.get("alisqto"));
continue;
}
@@ -184,21 +182,12 @@ public SortedSet scan(ProteinSequence sequence, URL serviceLocation
// this filters out multiple hits to the same clan
Integer outcompeted = getInteger(d.get("outcompeted"));
if ( outcompeted != null && outcompeted == 1) {
-// System.out.println(" outcompeted: " + d.get("alihmmdesc") + " " + d.get("alihmmname")+ " " +
-// hit.get("evalue") + " " +
-// d.get("alisqfrom") + " " +
-// d.get("alisqto")
-// );
continue;
}
Integer significant = getInteger(d.get("significant"));
if ( significant != 1) {
-// System.out.println(" not significant: " + d.get("alihmmdesc") + " " + d.get("alihmmname")+ " " +
-// hit.get("evalue") + " " +
-// d.get("alisqfrom") + " " +
-// d.get("alisqto"));
continue;
}
@@ -224,8 +213,8 @@ public SortedSet scan(ProteinSequence sequence, URL serviceLocation
results.add(hmmResult);
}
- } catch (Exception e){
- e.printStackTrace();
+ } catch (NumberFormatException e){
+ LOGGER.warn("Could not parse number in Hmmer web service json response: {}", e.getMessage());
}
return results;
diff --git a/pom.xml b/pom.xml
index 3993929e35..6513c4cb61 100644
--- a/pom.xml
+++ b/pom.xml
@@ -12,7 +12,7 @@
org.biojava
biojava
pom
- 4.2.6
+ 4.2.7
biojava
BioJava is an open-source project dedicated to providing a Java framework for processing biological
data. It provides analytical and statistical routines, parsers for common file formats and allows the
@@ -44,7 +44,7 @@
scm:git:git@github.com:biojava/biojava.git
https://github.com/biojava/biojava
- biojava-4.2.6
+ biojava-4.2.7