diff --git a/biojava-aa-prop/pom.xml b/biojava-aa-prop/pom.xml index d4d3afb89b..d20dfbb6ce 100644 --- a/biojava-aa-prop/pom.xml +++ b/biojava-aa-prop/pom.xml @@ -2,7 +2,7 @@ biojava org.biojava - 4.2.3 + 4.2.4 4.0.0 biojava-aa-prop @@ -70,12 +70,12 @@ org.biojava biojava-core - 4.2.3 + 4.2.4 org.biojava biojava-structure - 4.2.3 + 4.2.4 diff --git a/biojava-alignment/pom.xml b/biojava-alignment/pom.xml index c033b8d1f0..f6dae05256 100644 --- a/biojava-alignment/pom.xml +++ b/biojava-alignment/pom.xml @@ -4,7 +4,7 @@ biojava org.biojava - 4.2.3 + 4.2.4 biojava-alignment biojava-alignment @@ -46,7 +46,7 @@ org.biojava biojava-core - 4.2.3 + 4.2.4 compile @@ -74,7 +74,7 @@ org.biojava biojava-phylo - 4.2.3 + 4.2.4 diff --git a/biojava-core/pom.xml b/biojava-core/pom.xml index 674c0257f3..38b1f1ba0c 100644 --- a/biojava-core/pom.xml +++ b/biojava-core/pom.xml @@ -3,7 +3,7 @@ biojava org.biojava - 4.2.3 + 4.2.4 4.0.0 biojava-core diff --git a/biojava-core/src/main/java/org/biojava/nbio/core/sequence/ProteinSequence.java b/biojava-core/src/main/java/org/biojava/nbio/core/sequence/ProteinSequence.java index 714e55b09b..e3abbea6bd 100644 --- a/biojava-core/src/main/java/org/biojava/nbio/core/sequence/ProteinSequence.java +++ b/biojava-core/src/main/java/org/biojava/nbio/core/sequence/ProteinSequence.java @@ -153,7 +153,7 @@ public void setParentDNASequence(AbstractSequence parentDNAS } private DNASequence getRawParentSequence(String accessId) throws IOException { - String seqUrlTemplate = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=%s&rettype=fasta&retmode=text"; + String seqUrlTemplate = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=%s&rettype=fasta&retmode=text"; URL url = new URL(String.format(seqUrlTemplate, accessId)); logger.trace("Getting parent DNA sequence from URL: {}", url.toString()); diff --git a/biojava-core/src/main/java/org/biojava/nbio/core/sequence/io/IUPACParser.java b/biojava-core/src/main/java/org/biojava/nbio/core/sequence/io/IUPACParser.java index 46d058ec0f..8f1b449283 100644 --- a/biojava-core/src/main/java/org/biojava/nbio/core/sequence/io/IUPACParser.java +++ b/biojava-core/src/main/java/org/biojava/nbio/core/sequence/io/IUPACParser.java @@ -57,7 +57,7 @@ * * * Taken from NCBI with slight modification and put into the classpath resource. * * Takes in an ID, name, amino acid string and the locations of amino acids @@ -65,7 +65,7 @@ * position strings that correspond to the amino acid string or if you are using * the default IUPAC codes you can use the hardcoded ones which are consistent * amongst all codon + * href="https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c"> codon * tables. * * The generated {@link IUPACTable} objects do not parse the data further until diff --git a/biojava-core/src/main/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReader.java b/biojava-core/src/main/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReader.java index 24b5ce1ada..045900263a 100644 --- a/biojava-core/src/main/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReader.java +++ b/biojava-core/src/main/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReader.java @@ -56,7 +56,7 @@ public class GenbankProxySequenceReader extends StringProxyS private final static Logger logger = LoggerFactory.getLogger(GenbankProxySequenceReader.class); - private static final String eutilBaseURL = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/"; // + private static final String eutilBaseURL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"; // private String genbankDirectoryCache = null; private GenbankSequenceParser, C> genbankParser; private GenericGenbankHeaderParser, C> headerParser; diff --git a/biojava-core/src/test/resources/org/biojava/nbio/core/search/io/blast/small-blastreport.blastxml b/biojava-core/src/test/resources/org/biojava/nbio/core/search/io/blast/small-blastreport.blastxml index 47e75cafd9..a1e441c505 100644 --- a/biojava-core/src/test/resources/org/biojava/nbio/core/search/io/blast/small-blastreport.blastxml +++ b/biojava-core/src/test/resources/org/biojava/nbio/core/search/io/blast/small-blastreport.blastxml @@ -1,5 +1,5 @@ - + blastn BLASTN 2.2.29+ diff --git a/biojava-genome/pom.xml b/biojava-genome/pom.xml index 24d410ee7c..bb8d08147c 100644 --- a/biojava-genome/pom.xml +++ b/biojava-genome/pom.xml @@ -3,7 +3,7 @@ biojava org.biojava - 4.2.3 + 4.2.4 4.0.0 biojava-genome @@ -71,6 +71,12 @@ + + com.google.guava + guava + compile + 19.0 + junit junit @@ -79,13 +85,13 @@ org.biojava biojava-core - 4.2.3 + 4.2.4 compile org.biojava biojava-alignment - 4.2.3 + 4.2.4 compile diff --git a/biojava-genome/src/main/java/org/biojava/nbio/genome/parsers/genename/ChromPos.java b/biojava-genome/src/main/java/org/biojava/nbio/genome/parsers/genename/ChromPos.java new file mode 100644 index 0000000000..3d343d2f1a --- /dev/null +++ b/biojava-genome/src/main/java/org/biojava/nbio/genome/parsers/genename/ChromPos.java @@ -0,0 +1,31 @@ +package org.biojava.nbio.genome.parsers.genename; + +/** + * Created by ap3 on 27/10/2014. + */ +public class ChromPos { + + int pos; + int phase; + + public int getPhase() { + return phase; + } + + public void setPhase(int phase) { + this.phase = phase; + } + + public int getPos() { + return pos; + } + + public void setPos(int pos) { + this.pos = pos; + } + + public ChromPos(int pos, int phase){ + this.pos = pos; + this.phase = phase; + } +} \ No newline at end of file diff --git a/biojava-genome/src/main/java/org/biojava/nbio/genome/util/ChromosomeMappingTools.java b/biojava-genome/src/main/java/org/biojava/nbio/genome/util/ChromosomeMappingTools.java new file mode 100644 index 0000000000..07ace5278b --- /dev/null +++ b/biojava-genome/src/main/java/org/biojava/nbio/genome/util/ChromosomeMappingTools.java @@ -0,0 +1,1235 @@ +package org.biojava.nbio.genome.util; + +import com.google.common.collect.Range; + +import org.biojava.nbio.genome.parsers.genename.ChromPos; +import org.biojava.nbio.genome.parsers.genename.GeneChromosomePosition; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +import java.io.StringWriter; +import java.util.ArrayList; +import java.util.List; + +/** + * A class that take care of the painful mapping + */ +public class ChromosomeMappingTools { + + private static final Logger logger = LoggerFactory.getLogger(ChromosomeMappingTools.class); + + + private static final String newline = System.getProperty("line.separator"); + + + public static final String CHROMOSOME = "CHROMOSOME"; + public static final String CDS = "CDS"; + + + /** Pretty print the details of a GeneChromosomePosition to a String + * + * @param chromosomePosition + * @return + */ + public static String formatExonStructure(GeneChromosomePosition chromosomePosition ){ + if ( chromosomePosition.getOrientation() == '+') + return formatExonStructureForward(chromosomePosition); + + return formatExonStructureReverse(chromosomePosition); + + } + + + private static String formatExonStructureForward(GeneChromosomePosition chromPos) { + + StringWriter s = new StringWriter(); + + List exonStarts = chromPos.getExonStarts(); + List exonEnds = chromPos.getExonEnds(); + + int cdsStart = chromPos.getCdsStart(); + int cdsEnd = chromPos.getCdsEnd(); + + boolean inCoding = false; + int codingLength = 0; + + for (int i = 0; i < exonStarts.size(); i++) { + + int start = exonStarts.get(i); + int end = exonEnds.get(i); + + if (start <= cdsStart +1 && end >= cdsStart+1) { + + inCoding = true; + codingLength += (end - cdsStart); + s.append(" UTR : ").append(format(start)).append(" - ").append(format(cdsStart)); + s.append(newline); + s.append(" -> Exon : ").append(format(cdsStart + 1)).append(" - ").append(format(end)).append(" | ").append(Integer.toString(end - cdsStart)).append(" | ").append(Integer.toString(codingLength)).append(" | ").append(Integer.toString(codingLength % 3)); + s.append(newline); + + } else if (start <= cdsEnd && end >= cdsEnd) { + //logger.debug(" <-- CDS end at: " + cdsEnd ); + inCoding = false; + codingLength += (cdsEnd - start); + + s.append(" <- Exon : ").append(format(start + 1)).append(" - ").append(format(cdsEnd)).append(" | ").append(Integer.toString(cdsEnd - start)).append(" | ").append(Integer.toString(codingLength)).append(" | ").append(Integer.toString(codingLength % 3)); + s.append(newline); + s.append(" UTR : " + (cdsEnd +1) + " - " + format(end)); + s.append(newline); + + + } else if (inCoding) { + // full exon is coding + codingLength += (end - start); + + s.append(" Exon : ").append(format(start + 1)).append(" - ").append(format(end)).append(" | ").append(Integer.toString(end - start)).append(" | ").append(Integer.toString(codingLength)).append(" | ").append(Integer.toString(codingLength % 3)); + s.append(newline); + } + + } + s.append("Coding Length: "); + s.append((codingLength-3)+""); + s.append(newline); + return s.toString(); + } + + + + private static String showGenePosLink(GeneChromosomePosition chromPos, Integer pos ) { + + String spos = format(pos); + + StringBuffer buf = new StringBuffer(); + buf.append(""); + buf.append(spos); + buf.append(""); + + return buf.toString(); + } + + + private static String formatExonStructureReverse(GeneChromosomePosition chromPos) { + StringWriter s = new StringWriter(); + + List exonStarts = chromPos.getExonStarts(); + List exonEnds = chromPos.getExonEnds(); + + + int cdsStart = chromPos.getCdsStart(); + int cdsEnd = chromPos.getCdsEnd(); + + // logger.debug("CDS START:" +format(cdsStart) + " - " + format(cdsEnd)); + + boolean inCoding = false; + int codingLength = 0; + + if (cdsEnd < cdsStart) { + int tmp = cdsEnd; + cdsEnd = cdsStart; + cdsStart = tmp; + } + + // map reverse + for (int i = exonStarts.size() - 1; i >= 0; i--) { + + int end = exonStarts.get(i); + int start = exonEnds.get(i); + + if (end < start) { + int tmp = end; + end = start; + start = tmp; + } + + if (start <= cdsEnd && end >= cdsEnd) { + inCoding = true; + + int tmpstart = start; + if (start < cdsStart) { + tmpstart = cdsStart; + } + codingLength += (cdsEnd - tmpstart); + + s.append(" UTR :" + format(cdsEnd + 1) + " | " + format(end)); + s.append(newline); + if (tmpstart == start) + s.append(" -> "); + else + s.append(" <-> "); + s.append("Exon :").append(format(tmpstart + 1)).append(" - ").append(format(cdsEnd)).append(" | ").append(Integer.toString(cdsEnd - tmpstart)).append(" | ").append(Integer.toString(codingLength)).append(" | ").append(Integer.toString(codingLength % 3)); + s.append(newline); + // single exon with UTR on both ends + if (tmpstart != start) + s.append(" UTR :" + format(cdsStart ) + " - " + format(start + 1)); + s.append(newline); + + } else if (start <= cdsStart && end >= cdsStart) { + inCoding = false; + codingLength += (end - cdsStart); + + s.append(" <- Exon : " + format(cdsStart+1) + " - " + format(end) + " | " + (end - cdsStart) + " | " + codingLength + " | " + (codingLength % 3)); + s.append(newline); + s.append(" UTR : " + format(start+1) + " - " + format(cdsStart )); + s.append(newline); + + + } else if (inCoding) { + // full exon is coding + codingLength += (end - start); + + s.append(" Exon : " + format(start+1) + " - " + format(end) + " | " + (end - start) + " | " + codingLength + " | " + (codingLength % 3)); + s.append(newline); + } else { + // e.g. see UBQLN3 + s.append(" no translation! UTR: ").append(format(start)).append(" - ").append(format(end)); + s.append(newline); + } + } + + s.append("CDS length: ").append(Integer.toString(codingLength - 3)); + s.append(newline); + + return s.toString(); + } + + /** + * Get the length of the CDS in nucleotides. + * + * @param chromPos + * @return length of the CDS in nucleotides. + */ + public static int getCDSLength(GeneChromosomePosition chromPos) { + + logger.debug(chromPos.toString()); + + logger.debug("chromosomal information: "); + + logger.debug("Gene:" + chromPos.getGeneName()); + logger.debug(" Transcription (including UTRs): " + chromPos.getTranscriptionStart() + " - " + chromPos.getTranscriptionEnd() + " (length:" + (chromPos.getTranscriptionEnd() - chromPos.getTranscriptionStart()) + ")"); + logger.debug(" Orientation: " + chromPos.getOrientation()); + logger.debug(" CDS: " + (chromPos.getCdsStart()) + " - " + chromPos.getCdsEnd() + " (length: " + (chromPos.getCdsEnd() - chromPos.getCdsStart()) + ")"); + + + List exonStarts = chromPos.getExonStarts(); + List exonEnds = chromPos.getExonEnds(); + + logger.debug("Exons:" + exonStarts.size()); + + int cdsStart = chromPos.getCdsStart(); + int cdsEnd = chromPos.getCdsEnd(); + + + int codingLength; + if (chromPos.getOrientation().equals('+')) + codingLength = ChromosomeMappingTools.getCDSLengthForward(exonStarts, exonEnds, cdsStart, cdsEnd); + else + codingLength = ChromosomeMappingTools.getCDSLengthReverse(exonStarts, exonEnds, cdsStart, cdsEnd); + return codingLength; + } + + + /** + * maps the position of a CDS nucleotide back to the genome + * + * @param cdsNucleotidePosition + * @return a ChromPos object + */ + public static ChromPos getChromosomePosForCDScoordinate(int cdsNucleotidePosition, GeneChromosomePosition chromPos) { + + logger.debug(" ? Checking chromosome position for CDS position " + cdsNucleotidePosition); + + List exonStarts = chromPos.getExonStarts(); + List exonEnds = chromPos.getExonEnds(); + + logger.debug(" Exons:" + exonStarts.size()); + + int cdsStart = chromPos.getCdsStart(); + int cdsEnd = chromPos.getCdsEnd(); + + + ChromPos chromosomePos = null; + + if (chromPos.getOrientation().equals('+')) + + chromosomePos = ChromosomeMappingTools.getChromPosForward(cdsNucleotidePosition, exonStarts, exonEnds, cdsStart, cdsEnd); + else + chromosomePos = ChromosomeMappingTools.getChromPosReverse(cdsNucleotidePosition, exonStarts, exonEnds, cdsStart, cdsEnd); + + logger.debug("=> CDS pos " + cdsNucleotidePosition + " for " + chromPos.getGeneName() + " is on chromosome at " + chromosomePos); + return chromosomePos; + + } + + /** returns a nicely formatted representation of the position + * + * @param chromosomePosition + * @return + */ + private static String format(int chromosomePosition){ + + + return String.format("%,d", chromosomePosition); + } + + /** + * Get the CDS position mapped on the chromosome position + * + * @param exonStarts + * @param exonEnds + * @param cdsStart + * @param cdsEnd + * @return + */ + public static ChromPos getChromPosReverse(int cdsPos, List exonStarts, + List exonEnds, int cdsStart, int cdsEnd) { + + boolean inCoding = false; + int codingLength = 0; + + if (cdsEnd < cdsStart) { + int tmp = cdsEnd; + cdsEnd = cdsStart; + cdsStart = tmp; + } + + int lengthExons = 0; + + // map reverse + for (int i = exonStarts.size() - 1; i >= 0; i--) { + + logger.debug("Exon #" + (i+1) + "/" + exonStarts.size()); + int end = exonStarts.get(i); + int start = exonEnds.get(i); + + if (end < start) { + int tmp = end; + end = start; + start = tmp; + } + lengthExons += end - start; + + logger.debug(" is " + cdsPos + " part of Reverse exon? " + format(start+1) + " - " + format(end) + " | " + (end - start+1)); + logger.debug(" CDS start: " + format(cdsStart+1) + "-" + format(cdsEnd) + " coding length counter:" + codingLength); + + if (start+1 <= cdsEnd && end >= cdsEnd) { + + + // FIRST EXON + inCoding = true; + + + int tmpstart = start; + if (start < cdsStart) { + tmpstart = cdsStart; + } + + // here one of the few places where we don't say start+1 + int check = codingLength + cdsEnd - tmpstart ; + + logger.debug("First Exon | " + (check) + " | " + format(start+1) + " " + format(end) + " | " + (cdsEnd - tmpstart) + " | " + cdsPos ); + + + if ( ( check > cdsPos) ) + { + + int tmp = cdsPos - codingLength ; + + + logger.debug(" -> found position in UTR exon: " + format(cdsPos) + " " + format(tmpstart+1) + " tmp:" + format(tmp) + " cs:" + format(cdsStart+1) + " ce:" + format(cdsEnd) + " cl:" + codingLength); + + return new ChromPos((cdsEnd - tmp), -1) ; + } + + + // don't add 1 here + codingLength += (cdsEnd - tmpstart ); + + boolean debug = logger.isDebugEnabled(); + + + if ( debug ) { + + StringBuffer b = new StringBuffer(); + + b.append(" UTR :" + format(cdsEnd + 1) + " - " + format(end) + newline); + if (tmpstart == start) + b.append(" -> "); + else + b.append(" <-> "); + b.append("Exon :" + format(tmpstart + 1) + " - " + (cdsEnd) + " | " + format(cdsEnd - tmpstart + 1) + " - " + codingLength + " | " + (codingLength % 3) + newline); + + // single exon with UTR on both ends + if (tmpstart != start) + b.append(" UTR :" + format(cdsStart) + " - " + format(start + 1) + newline); + + logger.debug(b.toString()); + } + } else if (start <= cdsStart && end >= cdsStart) { + + // LAST EXON + inCoding = false; + + if (codingLength + end - cdsStart >= cdsPos) { + + // how many remaining coding nucleotides? + int tmp = codingLength + end - cdsStart - cdsPos ; + + logger.debug("cdl: " +codingLength + " tmp:" + tmp + " cdsStart: " + format(cdsStart)); + + logger.debug(" -> XXX found position noncoding exon: cdsPos:" + cdsPos + " s:" + format(start + 1) + " tmp:" + format(tmp) + " cdsStart:" + (cdsStart + 1) + " codingLength:" + codingLength + " cdsEnd:" + format(cdsEnd)); + + return new ChromPos((cdsStart + tmp),-1); + } + + codingLength += (end - cdsStart); + + logger.debug(" <- Exon : " + format(cdsStart+1) + " - " + format(end) + " | " + format(end - cdsStart+1) + " | " + codingLength + " | " + (codingLength % 3)); + logger.debug(" UTR : " + format(start+1) + " - " + format(cdsStart )); + + } else if (inCoding) { + + + if (codingLength + end - start -1 >= cdsPos) { + + int tmp = cdsPos - codingLength ; + + if ( tmp > (end - start ) ) { + + tmp = (end - start ); + + logger.debug("changing tmp to " + tmp); + + } + + logger.debug(" " + cdsPos + " " + codingLength + " | " + (cdsPos - codingLength) + " | " + (end -start) + " | " + tmp); + logger.debug(" Exon : " + format(start+1) + " - " + format(end) + " | " + format(end - start) + " | " + codingLength + " | " + (codingLength % 3)); + logger.debug(" -> RRR found position coding exon: " + cdsPos + " " + format(start+1) + " " + format(end) + " " + tmp + " " + format(cdsStart+1) + " " + codingLength); + + return new ChromPos((end - tmp),cdsPos %3); + } + + // full exon is coding + codingLength += (end - start) ; + + logger.debug(" Exon : " + format(start+1) + " - " + format(end) + " | " + format(end - start+1) + " | " + codingLength + " | " + (codingLength % 3)); + } else { + // e.g. see UBQLN3 + + logger.debug(" no translation!"); + } + + + logger.debug(" coding length: " + codingLength + "(phase:" + (codingLength % 3) + ") CDS POS trying to map:" + cdsPos); + + + } + + logger.debug("length exons: " + lengthExons); + // could not map, or map over the full length?? + return new ChromPos(-1,-1); + + + } + + /** + * Get the CDS position mapped onto the chromosome position + * + * @param exonStarts + * @param exonEnds + * @param cdsStart + * @param cdsEnd + * @return + */ + public static ChromPos getChromPosForward(int cdsPos, List exonStarts, List exonEnds, + int cdsStart, int cdsEnd) { + boolean inCoding = false; + int codingLength = 0; + + int lengthExons = 0; + // map forward + for (int i = 0; i < exonStarts.size(); i++) { + + + // start can include UTR + int start = exonStarts.get(i); + int end = exonEnds.get(i); + + lengthExons += end - start; + + + if (start <= cdsStart +1 && end >= cdsStart+1) { + // first exon with UTR + if (codingLength + (end - cdsStart-1) >= cdsPos) { + // we are reaching our target position + int tmp = cdsPos - codingLength; + + + logger.debug(cdsStart + " | " + codingLength + " | " + tmp); + logger.debug(" -> found position in UTR exon: #"+(i+1)+ " cdsPos:" + cdsPos + + " return:"+(cdsStart +1 + tmp) +" start:" + format(start + 1) + " " + format(tmp) + " " + cdsStart + " " + codingLength); + + // we start 1 after cdsStart... + return new ChromPos((cdsStart +1 + tmp),-1); + } + inCoding = true; + codingLength += (end - cdsStart); + + logger.debug(" UTR : " + format(start+1) + " - " + (cdsStart )); + logger.debug(" -> Exon : " + format(cdsStart+1) + " - " + format(end) + " | " + format(end - cdsStart) + " | " + codingLength + " | " + (codingLength % 3)); + + } else if (start+1 <= cdsEnd && end >= cdsEnd) { + // LAST EXON with UTR + //logger.debug(" <-- CDS end at: " + cdsEnd ); + inCoding = false; + if (codingLength + (cdsEnd - start-1) >= cdsPos) { + int tmp = cdsPos - codingLength; + + + logger.debug(" <- Exon : " + format(start+1) + " - " + format(cdsEnd) + " | " + format(cdsEnd - start) + " | " + codingLength + " | " + (codingLength % 3)); + logger.debug(" UTR : " + format(cdsEnd + 1) + " - " + format(end)); + logger.debug( codingLength + " | " + tmp + " | " + format(start+1)); + logger.debug(" -> chromPosForward found position in non coding exon: " + cdsPos + " " + format(start+1) + " " + format(tmp) + " " + format(cdsStart) + " " + codingLength); + + return new ChromPos((start +1 + tmp),cdsPos%3); + } + codingLength += (cdsEnd - start-1); + + logger.debug(" <- Exon : " + format(start+1) + " - " + format(cdsEnd) + " | " + format(cdsEnd - start) + " | " + codingLength + " | " + (codingLength % 3)); + logger.debug(" UTR : " + format(cdsEnd + 1) + " - " + format(end)); + + + } else if (inCoding) { + // A standard coding Exon + // tests for the maximum length of this coding exon + if (codingLength + (end - start -1) >= cdsPos) { + + // we are within the range of this exon + int tmp = cdsPos - codingLength ; + + + logger.debug(" Exon : " + format(start+1) + " - " + format(end) + " | " + format(end - start) + " | " + tmp + " | " + codingLength); + logger.debug(" -> found chr position in coding exon #" + (i+1) + ": cdsPos:" + format(cdsPos) + " s:" + format(start) + "-" + format(end) + " tmp:" + format(tmp) + " cdsStart:" + format(cdsStart) + " codingLength:" + codingLength); + + return new ChromPos((start +1 + tmp),cdsPos%3); + } + // full exon is coding + codingLength += (end - start ); + + logger.debug(" Exon : " + format(start+1) + " - " + format(end) + " | " + format(end - start) + " | " + codingLength + " | " + (codingLength % 3)); + } + // if ( inCoding ) + // logger.debug("exon phase at end:" + (codingLength % 3)); + // + // logger.debug(" coding length: " + codingLength); + + + } + + //logger.debug("length exons: " + lengthExons); + //return codingLength - 3; + + // could not map! + + return new ChromPos(-1,-1); + } + + + /** + * Get the length of the coding sequence + * + * @param exonStarts + * @param exonEnds + * @param cdsStart + * @param cdsEnd + * @return + */ + public static int getCDSLengthReverse(List exonStarts, + List exonEnds, int cdsStart, int cdsEnd) { + + boolean inCoding = false; + int codingLength = 0; + + if (cdsEnd < cdsStart) { + int tmp = cdsEnd; + cdsEnd = cdsStart; + cdsStart = tmp; + } + + int lengthExons = 0; + + // map reverse + for (int i = exonStarts.size() - 1; i >= 0; i--) { + + int end = exonStarts.get(i); + int start = exonEnds.get(i); + + if (end < start) { + int tmp = end; + end = start; + start = tmp; + } + lengthExons += end - start; + + if (start <= cdsEnd && end >= cdsEnd) { + inCoding = true; + + + int tmpstart = start; + if (start < cdsStart) { + tmpstart = cdsStart; + } + codingLength += (cdsEnd - tmpstart); + + boolean debug = logger.isDebugEnabled(); + + if ( debug) { + + StringBuffer b = new StringBuffer(); + + b.append(" UTR :" + (cdsEnd + 1) + " - " + (end) + newline); + if (tmpstart == start) + b.append(" -> "); + else + b.append(" <-> "); + b.append("Exon :" + tmpstart + " - " + cdsEnd + " | " + (cdsEnd - tmpstart) + " | " + codingLength + " | " + (codingLength % 3) + newline); + // single exon with UTR on both ends + if (tmpstart != start) + b.append(" UTR :" + (cdsStart - 1) + " - " + start + newline); + logger.debug(b.toString()); + + } + } else if (start <= cdsStart && end >= cdsStart) { + inCoding = false; + codingLength += (end - cdsStart); + + + logger.debug(" <- Exon : " + (cdsStart+1) + " - " + end + " | " + (end - cdsStart) + " | " + (codingLength-3) + " | " + (codingLength % 3)); + logger.debug(" UTR : " + start + " - " + (cdsStart )); + + + + } else if (inCoding) { + // full exon is coding + codingLength += (end - start); + + logger.debug(" Exon : " + start + " - " + end + " | " + (end - start) + " | " + codingLength + " | " + (codingLength % 3)); + } else { + // e.g. see UBQLN3 + + logger.debug(" no translation!"); + } + + } + + logger.debug("length exons: " + lengthExons + " codin length: " + (codingLength - 3)); + return codingLength - 3; + } + + /** + * Get the length of the coding sequence + * + * @param exonStarts + * @param exonEnds + * @param cdsStart + * @param cdsEnd + * @return + */ + public static int getCDSLengthForward(List exonStarts, List exonEnds, + int cdsStart, int cdsEnd) { + boolean inCoding = false; + int codingLength = 0; + + int lengthExons = 0; + // map forward + for (int i = 0; i < exonStarts.size(); i++) { + + int start = exonStarts.get(i); + int end = exonEnds.get(i); + lengthExons += end - start; + + + logger.debug("forward exon: " + (start+1) + " - " + end + " | " + (end - start)); + + if (start+1 <= cdsStart +1 && end >= cdsStart+1) { + + inCoding = true; + codingLength += (end - cdsStart); + + logger.debug(" UTR : " + start + " - " + (cdsStart )); + logger.debug(" -> Exon : " + (cdsStart+1) + " - " + end + " | " + (end - cdsStart+1) + " | " + codingLength + " | " + (codingLength % 3)); + + } else if (start+1 <= cdsEnd && end >= cdsEnd) { + + inCoding = false; + codingLength += (cdsEnd - start); + + logger.debug(" <- Exon : " + (start +1)+ " - " + cdsEnd + " | " + (cdsEnd - start+1) + " | " + codingLength + " | " + (codingLength % 3)); + logger.debug(" UTR : " + cdsEnd + 1 + " - " + end); + + + } else if (inCoding) { + // full exon is coding + codingLength += (end - start); + + logger.debug(" Exon :" + (start+1) + " - " + end + " | " + (end - start+1) + " | " + codingLength + " | " + (codingLength % 3)); + } + + + } + + logger.debug("length exons: " + Integer.toString(lengthExons)); + logger.debug("CDS length:" + Integer.toString((codingLength-3))); + + return codingLength-3 ; + } + + + + /** Extracts the exon boundaries in CDS coordinates. (needs to be divided by 3 to get AA positions) + * + * @param chromPos + * @return + */ + public static List> getCDSExonRanges(GeneChromosomePosition chromPos){ + if ( chromPos.getOrientation() == '+') + + return getCDSExonRangesForward(chromPos,CDS); + + return getCDSExonRangesReverse(chromPos,CDS); + } + + + /** Extracts the boundaries of the coding regions in chromosomal coordinates + * + * @param chromPos + * @return + */ + public static List> getChromosomalRangesForCDS(GeneChromosomePosition chromPos){ + if ( chromPos.getOrientation() == '+') + return getCDSExonRangesForward(chromPos,CHROMOSOME); + + return getCDSExonRangesReverse(chromPos,CHROMOSOME); + } + + private static List> getCDSExonRangesReverse(GeneChromosomePosition chromPos, + String responseType) { + + List exonStarts = chromPos.getExonStarts(); + List exonEnds = chromPos.getExonEnds(); + + List> data = new ArrayList<>(); + int cdsStart = chromPos.getCdsStart(); + int cdsEnd = chromPos.getCdsEnd(); + + boolean inCoding = false; + int codingLength = 0; + + if (cdsEnd < cdsStart) { + int tmp = cdsEnd; + cdsEnd = cdsStart; + cdsStart = tmp; + } + + java.lang.StringBuffer s =null; + + boolean debug = logger.isDebugEnabled(); + + + if ( debug) + s = new StringBuffer(); + + int lengthExons = 0; + + // map reverse + for (int i = exonStarts.size() - 1; i >= 0; i--) { + + int end = exonStarts.get(i); + int start = exonEnds.get(i); + + if (end < start) { + int tmp = end; + end = start; + start = tmp; + } + lengthExons += end - start; + //s.append("Reverse exon: " + end + " - " + start + " | " + (end - start)); + //s.append(newline); + + if (start <= cdsEnd && end >= cdsEnd) { + inCoding = true; + + + int tmpstart = start; + if (start < cdsStart) { + tmpstart = cdsStart; + } + codingLength += (cdsEnd - tmpstart); + if ( debug ) { + s.append(" UTR :").append(format(cdsEnd + 1)).append(" | ").append(format(end)); + s.append(newline); + if (tmpstart == start) + s.append(" -> "); + else + s.append(" <-> "); + s.append("Exon :").append(format(tmpstart + 1)).append(" - ").append(format(cdsEnd)).append(" | ").append(cdsEnd - tmpstart).append(" | ").append(codingLength).append(" | ").append(codingLength % 3); + s.append(newline); + // single exon with UTR on both ends + if (tmpstart != start) + s.append(" UTR :").append(format(cdsStart)).append(" - ").append(format(start + 1)); + s.append(newline); + } + + + Range r ; + if ( responseType.equals(CDS)) + r = Range.closed(0,codingLength); + else + r = Range.closed(tmpstart,cdsEnd); + + data.add(r); + + } else if (start <= cdsStart && end >= cdsStart) { + inCoding = false; + + Range r; + if ( responseType.equals(CDS)) + r = Range.closed(codingLength,codingLength+(end-cdsStart)); + else + r = Range.closed(cdsStart+1,end); + + data.add(r); + + codingLength += (end - cdsStart); + if (debug) { + s.append(" <- Exon : " + format(cdsStart + 1) + " - " + format(end) + " | " + (end - cdsStart) + " | " + codingLength + " | " + (codingLength % 3)); + s.append(newline); + s.append(" UTR : ").append(format(start + 1)).append(" - ").append(format(cdsStart)); + s.append(newline); + } + + + } else if (inCoding) { + // full exon is coding + + Range r; + if ( responseType.equals(CDS)) + r = Range.closed(codingLength,codingLength+(end-start)); + else + r = Range.closed(start,end); + data.add(r); + + codingLength += (end - start); + if (debug) { + s.append(" Exon : " + format(start + 1) + " - " + format(end) + " | " + (end - start) + " | " + codingLength + " | " + (codingLength % 3)); + s.append(newline); + } + } else { + // e.g. see UBQLN3 + if ( debug ) { + s.append(" no translation! UTR: " + format(start) + " - " + format(end)); + s.append(newline); + } + } + } + if ( debug ) { + s.append("CDS length: ").append(Integer.toString(codingLength - 3)); + s.append(newline); + logger.debug(s.toString()); + } + + return data; + } + + + + private static List> getCDSExonRangesForward(GeneChromosomePosition chromPos, + String responseType) { + + List> data = new ArrayList<>(); + List exonStarts = chromPos.getExonStarts(); + List exonEnds = chromPos.getExonEnds(); + + + int cdsStart = chromPos.getCdsStart(); + int cdsEnd = chromPos.getCdsEnd(); + + boolean inCoding = false; + int codingLength = 0; + + for (int i = 0; i < exonStarts.size(); i++) { + + int start = exonStarts.get(i); + int end = exonEnds.get(i); + + if (start <= cdsStart && end >= cdsStart) { + + inCoding = true; + codingLength += (end - cdsStart); +// + + + Range r; + if ( responseType.equals(CDS)) + r = Range.closed(0,codingLength); + else + r = Range.closed(cdsStart,end); + data.add(r); + + } else if (start <= cdsEnd && end >= cdsEnd) { + //logger.debug(" <-- CDS end at: " + cdsEnd ); + inCoding = false; + + + Range r; + if ( responseType.equals(CDS)) + r = Range.closed(codingLength,codingLength+(cdsEnd-start)); + else + r = Range.closed(start,cdsEnd); + data.add(r); + codingLength += (cdsEnd - start); + + } else if (inCoding) { + // full exon is coding + + Range r; + if ( responseType.equals(CDS)) + r = Range.closed(codingLength,codingLength+(end-start)); + else + r = Range.closed(start,end); + data.add(r); + codingLength += (end - start); + + } + + } + + return data; + } +// + + /** + * I have a genomic coordinate, where is it in the Gene? + * + * @param coordinate + * @param chromosomePosition + * @return + */ + public static int getCDSPosForChromosomeCoordinate(int coordinate, GeneChromosomePosition chromosomePosition) { + + if ( chromosomePosition.getOrientation() == '+') + return getCDSPosForward(coordinate, + chromosomePosition.getExonStarts(), + chromosomePosition.getExonEnds(), + chromosomePosition.getCdsStart(), + chromosomePosition.getCdsEnd()); + + return getCDSPosReverse(coordinate, + chromosomePosition.getExonStarts(), + chromosomePosition.getExonEnds(), + chromosomePosition.getCdsStart(), + chromosomePosition.getCdsEnd()); + + } + + + public static int getCDSPosReverse(int chromPos, List exonStarts, List exonEnds, + int cdsStart, int cdsEnd) { + boolean inCoding = false; + int codingLength = 0; + + if (cdsEnd < cdsStart) { + int tmp = cdsEnd; + cdsEnd = cdsStart; + cdsStart = tmp; + } + + logger.debug("looking for CDS position for " +format(chromPos)); + + + if ( chromPos < cdsStart+1 ) { + // this is not in a coding region! + + logger.debug(chromPos + " < " + cdsStart+1 ); + return -1; + } + + if ( chromPos > cdsEnd+1 ) { + // this is not in a coding region! + + logger.debug(chromPos + " > " + cdsEnd+1 ); + return -1; + } + + int lengthExons = 0; + + // map reverse + for (int i = exonStarts.size() - 1; i >= 0; i--) { + + logger.debug("Reverse Exon #" + (i+1) + "/" + exonStarts.size()); + int end = exonStarts.get(i); + int start = exonEnds.get(i); + + if (end < start) { + int tmp = end; + end = start; + start = tmp; + } + lengthExons += end - start; + + + logger.debug(" is " + format(chromPos) + " part of Reverse exon? s:" + format(start+1) + " - e:" + format(end) + " | " + (end - start+1)); + logger.debug(" CDS start: " + format(cdsStart+1) + "-" + format(cdsEnd) + " coding length counter:" + codingLength); + + + + if (start+1 <= cdsEnd && end >= cdsEnd ) { + + // first exon with UTR + + inCoding = true; + + int tmpstart = start; + if (start < cdsStart) { + tmpstart = cdsStart; + } + + + logger.debug(" --- codingLength " + codingLength + + " s:" + + format(tmpstart+1) + + " e:" + + format(cdsEnd) + + " p:" + + format(chromPos) + " tmp: " + (chromPos - cdsStart)); + + logger.debug("check: " + (codingLength + cdsEnd - tmpstart+1) + " ==?? " + format(chromPos)); + + int tmp = cdsEnd - chromPos ; + // if (codingLength + cdsEnd - tmpstart >= chromPos) { + //if (end >= chromPos && start + (end-start) >= chromPos) { + // if (codingLength + cdsEnd - tmpstart >= chromPos) { + if ( chromPos >= start +1 && chromPos <= end){ + + + logger.debug(" -> found position in UTR exon: P: " + format(chromPos) + " s:" + format(tmpstart+1) + " l:" + format(tmp) + " cdsS:" + format(cdsStart+1) + " cdsE:" + format(cdsEnd) + " codingL:" + codingLength); + return codingLength + tmp; + } + + + logger.debug(" codinglength " + codingLength + " + " + (cdsEnd - tmpstart ) ); + + // do not add 1 here + codingLength += (cdsEnd - tmpstart ); + + boolean debug = logger.isDebugEnabled(); + + if (debug) { + StringBuffer b = new StringBuffer(); + b.append(" UTR :" + format(cdsEnd + 1) + " - " + format(end) + newline); + if (tmpstart == start) + b.append(" -> "); + else + b.append(" <-> "); + b.append("Reverse Exon :" + format(tmpstart+1) + " - " + (cdsEnd) + " | " + format(cdsEnd - tmpstart) + " - " + codingLength + " | " + (codingLength % 3) + newline); + + logger.debug(b.toString()); + + // single exon with UTR on both ends + if (tmpstart != start) + logger.debug(" UTR :" + format(cdsStart - 1) + " - " + format(start)); + } + } else if (start <= cdsStart && end >= cdsStart) { + + // terminal exon with UTR + inCoding = false; + + + logger.debug(format(start + codingLength + end - cdsStart) + " ?? " + format(chromPos)); + // (start + codingLength + end - cdsStart >= chromPos && + if (( start+1 <= chromPos) && ( end >= chromPos)) { + + //int tmp = end - cdsStart ; +// int tmp = chromPos - cdsStart ; +// int l = end - cdsStart; + int tmp = end-chromPos ; + if ( tmp > end -cdsStart) { + tmp = end-cdsStart ; + + logger.debug("Adjust tmp to " + tmp); + } + + + + logger.debug( codingLength + " | " + (end -chromPos) + " | " + (end - cdsStart) ); + logger.debug(" <- Exon : " + format(cdsStart) + " - " + format(end) + " | " + format(end - cdsStart +1) + " | "); + logger.debug(" UTR : " + format(start+1) + " - " + format(cdsStart )); + logger.debug(" <- YYY found position noncoding exon: #" + (i+1) + " " + format(chromPos) + " s:" + format(start) + " tmp: " + format(tmp) + " cdsStart" + format(cdsStart) + " cdl:" + codingLength + " " + format(cdsEnd)); + + return codingLength + tmp; + } + + + logger.debug(" codinglength " + codingLength + " + " + (end - cdsStart) ); + codingLength += (end - cdsStart+1); + + logger.debug(" <- Exon : " + format(cdsStart+1) + " - " + format(end) + " | " + format(end - cdsStart) + " | " + codingLength + " | " + (codingLength % 3)); + logger.debug(" UTR : " + format(start+1) + " - " + format(cdsStart )); + + } else if (inCoding) { + // standard coding exon + // if (codingLength + end - start >= chromPos) { + if ( chromPos >= start+1 && chromPos <= end) { + + int tmp = end -chromPos ; + if ( tmp > (end-start+1)) { + + tmp = (end - start+1); + + logger.debug("Adjusting tmp to " + tmp); + } + + + logger.debug(" -> found position in reverse coding exon: #" + (i+1) + " chromPos:" + format(chromPos) + " start:" + format(start+1) + " end:" + format(end) + " tmp:" + tmp + " cdsStart:" + cdsStart + " codingLength:" + codingLength); + + return codingLength+tmp; + } + + // full exon is coding + + logger.debug(" codinglength " + codingLength + " + " + (end - start) ); + // don't add 1 + codingLength += (end - start); + + logger.debug(" Exon : " + format(start+1) + " - " + format(end) + " | " + format(end - start) + " | " + codingLength + " | " + (codingLength % 3)); + } else { + // e.g. see UBQLN3 + + logger.debug(" no translation! cdl:" + codingLength); + + } + + //if ( inCoding ) + // logger.debug(" exon phase at end:" + ((codingLength) % 3)); + + logger.debug(" coding length: " + codingLength + "(phase:" + (codingLength % 3) + ") CDS POS trying to map:" + chromPos); + + + } + + logger.debug("length exons: " + lengthExons); + // could not map, or map over the full length?? + + + return -1; + + } + + /** + * Get the chromosome position mapped onto the mrna CDS transcript position (needs to be divided by 3 to get protein coordinate) + * + * @param exonStarts + * @param exonEnds + * @param cdsStart + * @param cdsEnd + * @return + */ + public static int getCDSPosForward(int chromPos, List exonStarts, List exonEnds, + int cdsStart, int cdsEnd) { + boolean inCoding = false; + int codingLength = 0; + + + logger.debug("looking for CDS position for " +chromPos); + + int lengthExons = 0; + // map forward + for (int i = 0; i < exonStarts.size(); i++) { + + + // start can include UTR + int start = exonStarts.get(i); + int end = exonEnds.get(i); + + lengthExons += end - start; + + + logger.debug("forward exon: " + (start+1) + " - " + end + " | " + (end - start) + " ? overlaps with " + format(chromPos)); + + if (start +1 <= cdsStart +1 && end >= cdsStart+1) { + + if (end >= chromPos) { + // we are reaching our target position + // -1 is important here... + int tmp = chromPos - cdsStart -1; + + + logger.debug("cdl:" + codingLength + " | " + tmp); + logger.debug(" -> found position in UTR exon: " + chromPos + " " + format(start) + " " + format(tmp) + " " + cdsStart + " " + codingLength); + + return codingLength + tmp; + } + inCoding = true; + codingLength += (end - cdsStart); + + logger.debug(" UTR : " + format(start) + " - " + (cdsStart )); + logger.debug(" -> Exon : " + format(cdsStart+1) + " - " + format(end) + " | " + format(end - cdsStart) + " | " + codingLength + " | " + (codingLength % 3)); + + } else if (start <= cdsEnd && end >= cdsEnd) { + //logger.debug(" <-- CDS end at: " + cdsEnd ); + inCoding = false; + if (cdsEnd >= chromPos && (start +1 <= chromPos)) { + int tmp = chromPos - start -1 ; + + + logger.debug(" -> cdsForward found position in non coding exon#"+i+": " + chromPos + " " + format(start+1) + " " + format(tmp) + " " + cdsStart + " " + codingLength); + return codingLength + tmp ; + } + codingLength += (cdsEnd - start); + + logger.debug(" <- Exon : " + format(start+1) + " - " + format(cdsEnd) + " | " + format(cdsEnd - start+1) + " | " + codingLength + " | " + (codingLength % 3)); + logger.debug(" UTR : " + format(cdsEnd + 1) + " - " + format(end)); + + + } else if (inCoding) { + + if (end >= chromPos && (start +1 <=chromPos)) { + + int tmp = chromPos-start-1 ; + + + logger.debug(codingLength + " | " + tmp); + logger.debug(" -> found position in coding exon #" + (i + 1) + ": " + format(chromPos) + " " + format(start + 1) + " " + format(tmp) + " " + cdsStart + " " + codingLength); + + + return codingLength + tmp ; + } + // full exon is coding + codingLength += (end - start); + + logger.debug(" Exon :" + format(start) + " - " + format(end) + " | " + format(end - start) + " | " + codingLength + " | " + (codingLength % 3)); + } + // if ( inCoding ) + // logger.debug("exon phase at end:" + (codingLength % 3)); + // + // logger.debug(" coding length: " + codingLength); + + + } + + //logger.debug("length exons: " + lengthExons); + //return codingLength - 3; + + // could not map! + + return -1; + } + + +} diff --git a/biojava-genome/src/test/java/org/biojava/nbio/genome/TestGenomeMapping.java b/biojava-genome/src/test/java/org/biojava/nbio/genome/TestGenomeMapping.java new file mode 100644 index 0000000000..b4615b9db2 --- /dev/null +++ b/biojava-genome/src/test/java/org/biojava/nbio/genome/TestGenomeMapping.java @@ -0,0 +1,151 @@ +package org.biojava.nbio.genome; + +import com.google.common.collect.Lists; +import com.google.common.collect.Range; +import junit.framework.TestCase; +import org.biojava.nbio.genome.parsers.genename.GeneChromosomePosition; +import org.biojava.nbio.genome.parsers.genename.GeneChromosomePositionParser; +import org.biojava.nbio.genome.util.ChromosomeMappingTools; +import org.junit.Test; + +import java.io.InputStream; +import java.net.URL; +import java.util.List; +import java.util.zip.GZIPInputStream; + +/** + * Created by andreas on 7/19/16. + */ +public class TestGenomeMapping extends TestCase{ + + private static final String geneChromosomeFile = "http://cdn.rcsb.org/gene/hg38/geneChromosome38.tsf.gz"; + + private List gcps = null; + + @Override + protected void setUp() throws Exception { + super.setUp(); + InputStream input = new GZIPInputStream(new URL(geneChromosomeFile).openStream()); + gcps = GeneChromosomePositionParser.getChromosomeMappings(input); + + + } + + + @Test + public void testAK1() { + String geneName = "AK1"; + + assertNotNull(gcps); + assertTrue("Problems with downloading refFlat file from UCSC browser ", gcps.size() > 100); + + int uniProtLength = 194; + + try { + + for (GeneChromosomePosition pos : gcps) { + + //System.out.println(pos.getGeneName()); + if (!pos.getGeneName().equals(geneName)) + continue; + + /// there are three alternative transcripts for AK1. + // we are just testing one here: + + if ( ! pos.getGenebankId().equals("NM_000476")) + continue; + + assertTrue(pos.getGeneName().equals(geneName)); + assertTrue(pos.getOrientation().equals('-')); + assertTrue(pos.getChromosome().equals("chr9")); + + List> cdsranges = ChromosomeMappingTools.getCDSExonRanges(pos); + + validateExon(0,0,7, cdsranges ); + validateExon(1,7,43, cdsranges ); + validateExon(2,43,207, cdsranges ); + validateExon(3,207,324, cdsranges ); + validateExon(4,324,516, cdsranges ); + validateExon(5,516,585, cdsranges ); + + + int cdslength = ChromosomeMappingTools.getCDSLength(pos); + + assertTrue("CDS length should be 582, but is " + cdslength, cdslength == (uniProtLength *3)); + + List> chromranges = ChromosomeMappingTools.getChromosomalRangesForCDS(pos); + + // we are reverse strand. reverse the order + chromranges = Lists.reverse(chromranges); + + assertTrue(chromranges.size() == 6); + + // compare with https://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi?REQUEST=CCDS&DATA=CCDS6881 + validateExon(0,127868008,127868076, chromranges ); + validateExon(1,127868320,127868512, chromranges ); + validateExon(2,127871822,127871939, chromranges ); + validateExon(3,127872689,127872853, chromranges ); + validateExon(4,127873025,127873061, chromranges ); + validateExon(5,127874610,127874617, chromranges ); + + } + } catch (Exception e) { + fail(e.getMessage()); + } + } + + @Test + public void testHBA(){ + + String geneName = "HBA1"; + assertNotNull(gcps); + + assertTrue("Problems with downloading refFlat file from UCSC browser ", gcps.size() > 100); + + try { + + for ( GeneChromosomePosition pos : gcps){ + + //System.out.println(pos.getGeneName()); + if ( ! pos.getGeneName().equals(geneName)) + continue; + + assertTrue(pos.getGeneName().equals("HBA1")); + assertTrue(pos.getGenebankId().equals("NM_000558")); + assertTrue(pos.getChromosome().equals("chr16")); + assertTrue(pos.getTranscriptionStart().equals(176650)); + assertTrue(pos.getTranscriptionEnd().equals(177522)); + assertTrue(pos.getOrientation().equals('+')); + + List> cdsranges = ChromosomeMappingTools.getCDSExonRanges(pos); + + assertTrue(cdsranges.size() == 3); + + validateExon(0,0,95,cdsranges); + validateExon(1,95,300,cdsranges); + validateExon(2,300,429,cdsranges); + + + List> chromranges = ChromosomeMappingTools.getChromosomalRangesForCDS(pos); + + validateExon(0,176716,176811, chromranges ); + validateExon(1,176928,177133, chromranges ); + validateExon(2,177282,177411, chromranges ); + + + } + } catch (Exception e){ + fail(e.getMessage()); + } + + + } + + private void validateExon(int exonNr, int start, int stop, List> cdsranges) { + + Range exon = cdsranges.get(exonNr); + assertTrue("Exon " + exonNr + " boundary "+ exon.lowerEndpoint() + " does not match " +start , exon.lowerEndpoint().equals(start)); + assertTrue("Exon " + exonNr + " boundary " + exon.upperEndpoint() + " does not match " + stop, exon.upperEndpoint().equals(stop)); + + } +} diff --git a/biojava-integrationtest/pom.xml b/biojava-integrationtest/pom.xml index 61f9a178e4..c10746a76e 100644 --- a/biojava-integrationtest/pom.xml +++ b/biojava-integrationtest/pom.xml @@ -4,7 +4,7 @@ biojava org.biojava - 4.2.3 + 4.2.4 biojava-integrationtest jar @@ -32,7 +32,7 @@ org.biojava biojava-structure - 4.2.3 + 4.2.4 diff --git a/biojava-modfinder/pom.xml b/biojava-modfinder/pom.xml index 0c8c9b759f..4551514d9a 100644 --- a/biojava-modfinder/pom.xml +++ b/biojava-modfinder/pom.xml @@ -4,7 +4,7 @@ biojava org.biojava - 4.2.3 + 4.2.4 biojava-modfinder biojava-modfinder @@ -31,7 +31,7 @@ org.biojava biojava-structure - 4.2.3 + 4.2.4 jar compile diff --git a/biojava-ontology/pom.xml b/biojava-ontology/pom.xml index b0f4378c55..8e0ccad72f 100644 --- a/biojava-ontology/pom.xml +++ b/biojava-ontology/pom.xml @@ -4,7 +4,7 @@ org.biojava biojava - 4.2.3 + 4.2.4 biojava-ontology diff --git a/biojava-phylo/pom.xml b/biojava-phylo/pom.xml index 96be175f15..e7758ae1ec 100644 --- a/biojava-phylo/pom.xml +++ b/biojava-phylo/pom.xml @@ -3,7 +3,7 @@ biojava org.biojava - 4.2.3 + 4.2.4 4.0.0 biojava-phylo @@ -44,7 +44,7 @@ org.biojava biojava-core - 4.2.3 + 4.2.4 compile diff --git a/biojava-protein-disorder/pom.xml b/biojava-protein-disorder/pom.xml index cd338b3b8f..535e7095ff 100644 --- a/biojava-protein-disorder/pom.xml +++ b/biojava-protein-disorder/pom.xml @@ -3,7 +3,7 @@ biojava org.biojava - 4.2.3 + 4.2.4 biojava-protein-disorder jar @@ -63,7 +63,7 @@ org.biojava biojava-core - 4.2.3 + 4.2.4 diff --git a/biojava-sequencing/pom.xml b/biojava-sequencing/pom.xml index 374ea82c69..461bddf663 100644 --- a/biojava-sequencing/pom.xml +++ b/biojava-sequencing/pom.xml @@ -3,7 +3,7 @@ biojava org.biojava - 4.2.3 + 4.2.4 4.0.0 biojava-sequencing @@ -47,7 +47,7 @@ org.biojava biojava-core - 4.2.3 + 4.2.4 compile diff --git a/biojava-structure-gui/pom.xml b/biojava-structure-gui/pom.xml index 290c5a3a55..df3eb75ffe 100644 --- a/biojava-structure-gui/pom.xml +++ b/biojava-structure-gui/pom.xml @@ -3,7 +3,7 @@ biojava org.biojava - 4.2.3 + 4.2.4 4.0.0 biojava-structure-gui @@ -25,13 +25,13 @@ org.biojava biojava-structure - 4.2.3 + 4.2.4 compile org.biojava biojava-core - 4.2.3 + 4.2.4 compile diff --git a/biojava-structure/pom.xml b/biojava-structure/pom.xml index ddee49b548..c59b4e1068 100644 --- a/biojava-structure/pom.xml +++ b/biojava-structure/pom.xml @@ -4,7 +4,7 @@ biojava org.biojava - 4.2.3 + 4.2.4 biojava-structure biojava-structure @@ -22,13 +22,13 @@ org.biojava biojava-alignment - 4.2.3 + 4.2.4 compile org.biojava biojava-core - 4.2.3 + 4.2.4 compile diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/AminoAcidImpl.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/AminoAcidImpl.java index 5e20e9be4a..ab9c576210 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/AminoAcidImpl.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/AminoAcidImpl.java @@ -183,6 +183,10 @@ public Object clone() { n.addAltLoc(nAltLocGroup); } } + + if (chemComp!=null) + n.setChemComp(chemComp); + return n; } diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/HetatomImpl.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/HetatomImpl.java index 076bd86f0d..6039cb8f32 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/HetatomImpl.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/HetatomImpl.java @@ -88,7 +88,7 @@ public static enum PerformanceBehavior { private Map atomNameLookup; - private ChemComp chemComp ; + protected ChemComp chemComp ; private List altLocs; @@ -402,6 +402,9 @@ public Object clone() { n.addAltLoc(nAltLocGroup); } } + + if (chemComp!=null) + n.setChemComp(chemComp); return n; } diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/NucleotideImpl.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/NucleotideImpl.java index 1616de1ef0..7cc5a9400b 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/NucleotideImpl.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/NucleotideImpl.java @@ -27,10 +27,9 @@ /** - * - * A nucleotide group is almost the same as a Hetatm group. - * @see HetatomImpl - * @see AminoAcidImpl + * A nucleotide group is almost the same as a Hetatm group. + * @see HetatomImpl + * @see AminoAcidImpl * @author Andreas Prlic * @since 1.4 * @version %I% %G% @@ -94,29 +93,5 @@ public Atom getP() { } - @Override - public Object clone(){ - NucleotideImpl n = new NucleotideImpl(); - - n.setPDBFlag(has3D()); - n.setResidueNumber(getResidueNumber()); - - n.setPDBName(getPDBName()); - - // copy the atoms - for (Atom atom1 : atoms) { - Atom atom = (Atom) atom1.clone(); - n.addAtom(atom); - atom.setGroup(n); - } - - // copying the alt loc groups if present, otherwise they stay null - if (getAltLocs()!=null && !getAltLocs().isEmpty()) { - for (Group altLocGroup:this.getAltLocs()) { - Group nAltLocGroup = (Group)altLocGroup.clone(); - n.addAltLoc(nAltLocGroup); - } - } - return n; - } + // no need to implement clone here, it's already in super class } diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/align/AFPTwister.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/align/AFPTwister.java index 96d9da57ba..43f32085c9 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/align/AFPTwister.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/align/AFPTwister.java @@ -3,7 +3,7 @@ * Yuzhen Ye & Adam Godzik (2003) * Flexible structure alignment by chaining aligned fragment pairs allowing twists. * Bioinformatics vol.19 suppl. 2. ii246-ii255. - * http://www.ncbi.nlm.nih.gov/pubmed/14534198 + * https://www.ncbi.nlm.nih.gov/pubmed/14534198 * * * Thanks to Yuzhen Ye and A. Godzik for granting permission to freely use and redistribute this code. diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/FatCat.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/FatCat.java index ad9dd1615d..6a5aa6dc93 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/FatCat.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/FatCat.java @@ -3,7 +3,7 @@ * Yuzhen Ye & Adam Godzik (2003) * Flexible structure alignment by chaining aligned fragment pairs allowing twists. * Bioinformatics vol.19 suppl. 2. ii246-ii255. - * http://www.ncbi.nlm.nih.gov/pubmed/14534198 + * https://www.ncbi.nlm.nih.gov/pubmed/14534198 * * * Thanks to Yuzhen Ye and A. Godzik for granting permission to freely use and redistribute this code. diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/FatCatFlexible.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/FatCatFlexible.java index 3379895038..b4d6479620 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/FatCatFlexible.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/FatCatFlexible.java @@ -3,7 +3,7 @@ * Yuzhen Ye & Adam Godzik (2003) * Flexible structure alignment by chaining aligned fragment pairs allowing twists. * Bioinformatics vol.19 suppl. 2. ii246-ii255. - * http://www.ncbi.nlm.nih.gov/pubmed/14534198 + * https://www.ncbi.nlm.nih.gov/pubmed/14534198 * * * Thanks to Yuzhen Ye and A. Godzik for granting permission to freely use and redistribute this code. diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/FatCatRigid.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/FatCatRigid.java index 62a5735d16..d6fc87a506 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/FatCatRigid.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/FatCatRigid.java @@ -3,7 +3,7 @@ * Yuzhen Ye & Adam Godzik (2003) * Flexible structure alignment by chaining aligned fragment pairs allowing twists. * Bioinformatics vol.19 suppl. 2. ii246-ii255. - * http://www.ncbi.nlm.nih.gov/pubmed/14534198 + * https://www.ncbi.nlm.nih.gov/pubmed/14534198 * * * Thanks to Yuzhen Ye and A. Godzik for granting permission to freely use and redistribute this code. diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/AFPCalculator.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/AFPCalculator.java index a89500596c..51c51f8c17 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/AFPCalculator.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/AFPCalculator.java @@ -3,7 +3,7 @@ * Yuzhen Ye & Adam Godzik (2003) * Flexible structure alignment by chaining aligned fragment pairs allowing twists. * Bioinformatics vol.19 suppl. 2. ii246-ii255. - * http://www.ncbi.nlm.nih.gov/pubmed/14534198 + * https://www.ncbi.nlm.nih.gov/pubmed/14534198 * * * Thanks to Yuzhen Ye and A. Godzik for granting permission to freely use and redistribute this code. diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/AFPChainer.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/AFPChainer.java index 9344c9c255..3db4193ac4 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/AFPChainer.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/AFPChainer.java @@ -3,7 +3,7 @@ * Yuzhen Ye & Adam Godzik (2003) * Flexible structure alignment by chaining aligned fragment pairs allowing twists. * Bioinformatics vol.19 suppl. 2. ii246-ii255. - * http://www.ncbi.nlm.nih.gov/pubmed/14534198 + * https://www.ncbi.nlm.nih.gov/pubmed/14534198 * * * Thanks to Yuzhen Ye and A. Godzik for granting permission to freely use and redistribute this code. diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/AFPOptimizer.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/AFPOptimizer.java index 8e276afa0a..ca38425aa9 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/AFPOptimizer.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/AFPOptimizer.java @@ -3,7 +3,7 @@ * Yuzhen Ye & Adam Godzik (2003) * Flexible structure alignment by chaining aligned fragment pairs allowing twists. * Bioinformatics vol.19 suppl. 2. ii246-ii255. - * http://www.ncbi.nlm.nih.gov/pubmed/14534198 + * https://www.ncbi.nlm.nih.gov/pubmed/14534198 * * * Thanks to Yuzhen Ye and A. Godzik for granting permission to freely use and redistribute this code. diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/AFPPostProcessor.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/AFPPostProcessor.java index 60e5a05c33..5f940ec6a1 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/AFPPostProcessor.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/AFPPostProcessor.java @@ -3,7 +3,7 @@ * Yuzhen Ye & Adam Godzik (2003) * Flexible structure alignment by chaining aligned fragment pairs allowing twists. * Bioinformatics vol.19 suppl. 2. ii246-ii255. - * http://www.ncbi.nlm.nih.gov/pubmed/14534198 + * https://www.ncbi.nlm.nih.gov/pubmed/14534198 * * * Thanks to Yuzhen Ye and A. Godzik for granting permission to freely use and redistribute this code. diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/FCAlignHelper.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/FCAlignHelper.java index 08a81eebb4..74c6d66ce5 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/FCAlignHelper.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/FCAlignHelper.java @@ -3,7 +3,7 @@ * Yuzhen Ye & Adam Godzik (2003) * Flexible structure alignment by chaining aligned fragment pairs allowing twists. * Bioinformatics vol.19 suppl. 2. ii246-ii255. - * http://www.ncbi.nlm.nih.gov/pubmed/14534198 + * https://www.ncbi.nlm.nih.gov/pubmed/14534198 * * * Thanks to Yuzhen Ye and A. Godzik for granting permission to freely use and redistribute this code. diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/FatCatAligner.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/FatCatAligner.java index e8546908f8..b4200cd88f 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/FatCatAligner.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/FatCatAligner.java @@ -3,7 +3,7 @@ * Yuzhen Ye & Adam Godzik (2003) * Flexible structure alignment by chaining aligned fragment pairs allowing twists. * Bioinformatics vol.19 suppl. 2. ii246-ii255. - * http://www.ncbi.nlm.nih.gov/pubmed/14534198 + * https://www.ncbi.nlm.nih.gov/pubmed/14534198 * * * Thanks to Yuzhen Ye and A. Godzik for granting permission to freely use and redistribute this code. diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/FatCatParameters.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/FatCatParameters.java index 8e9dd2e8f9..cb5ae3628b 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/FatCatParameters.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/FatCatParameters.java @@ -3,7 +3,7 @@ * Yuzhen Ye & Adam Godzik (2003) * Flexible structure alignment by chaining aligned fragment pairs allowing twists. * Bioinformatics vol.19 suppl. 2. ii246-ii255. - * http://www.ncbi.nlm.nih.gov/pubmed/14534198 + * https://www.ncbi.nlm.nih.gov/pubmed/14534198 * * * Thanks to Yuzhen Ye and A. Godzik for granting permission to freely use and redistribute this code. diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/SigEva.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/SigEva.java index 351d531d07..d111e55fd6 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/SigEva.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/SigEva.java @@ -3,7 +3,7 @@ * Yuzhen Ye & Adam Godzik (2003) * Flexible structure alignment by chaining aligned fragment pairs allowing twists. * Bioinformatics vol.19 suppl. 2. ii246-ii255. - * http://www.ncbi.nlm.nih.gov/pubmed/14534198 + * https://www.ncbi.nlm.nih.gov/pubmed/14534198 * * * Thanks to Yuzhen Ye and A. Godzik for granting permission to freely use and redistribute this code. diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/StructureAlignmentOptimizer.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/StructureAlignmentOptimizer.java index dedeaabad8..2908f91e42 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/StructureAlignmentOptimizer.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/align/fatcat/calc/StructureAlignmentOptimizer.java @@ -3,7 +3,7 @@ * Yuzhen Ye & Adam Godzik (2003) * Flexible structure alignment by chaining aligned fragment pairs allowing twists. * Bioinformatics vol.19 suppl. 2. ii246-ii255. - * http://www.ncbi.nlm.nih.gov/pubmed/14534198 + * https://www.ncbi.nlm.nih.gov/pubmed/14534198 * * * Thanks to Yuzhen Ye and A. Godzik for granting permission to freely use and redistribute this code. diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/cath/CathFactory.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/cath/CathFactory.java index d80664affa..1b185fc01b 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/cath/CathFactory.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/cath/CathFactory.java @@ -35,11 +35,12 @@ */ public class CathFactory { - public static final String VERSION_3_5_0 = "3.5.0"; - public static final String VERSION_4_0_0 = "4.0.0"; - public static final String LATEST_VERSION = VERSION_4_0_0; + public static final String VERSION_3_5_0 = "3_5_0"; + public static final String VERSION_4_0_0 = "4_0_0"; + public static final String VERSION_4_1_0 = "4_1_0"; + public static final String LATEST_VERSION = VERSION_4_1_0; - public static String DEFAULT_VERSION = LATEST_VERSION; + public static final String DEFAULT_VERSION = LATEST_VERSION; private static CathDatabase cath; diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/cath/CathInstallation.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/cath/CathInstallation.java index 77346f0cc6..41ae6b014f 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/cath/CathInstallation.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/cath/CathInstallation.java @@ -25,6 +25,8 @@ import org.biojava.nbio.structure.align.util.UserConfiguration; import org.biojava.nbio.structure.io.util.FileDownloadUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.biojava.nbio.core.util.InputStreamProvider; import java.io.*; @@ -41,41 +43,42 @@ */ public class CathInstallation implements CathDatabase{ + private static final Logger LOGGER = LoggerFactory.getLogger(CathInstallation.class); + public static final String DEFAULT_VERSION = CathFactory.DEFAULT_VERSION; - String cathVersion; + public static final String domainListFileName = "cath-domain-list-v%s.txt"; + public static final String domainDescriptionFileName = "cath-domain-description-file-v%s.txt"; + public static final String nodeListFileName = "cath-names-v%s.txt"; + public static final String domallFileName = "cath-domain-boundaries-v%s.txt"; + + public static final String CATH_DOWNLOAD_URL = "http://download.cathdb.info/cath/releases/"; + public static final String CATH_DOWNLOAD_LATEST_RELEASE_DIR = "latest-release"; + public static final String CATH_DOWNLOAD_PREV_RELEASE_DIR = "previous-releases"; + public static final String CATH_DOWNLOAD_CLASSIFICATION_DATA_DIR = "cath-classification-data"; - public static final String domainListFileName = "CathDomainList"; - public static final String domainDescriptionFileName = "CathDomainDescriptionFile"; - public static final String nodeListFileName = "CathNames"; - public static final String domallFileName = "CathDomall"; + public static final String NEWLINE = System.getProperty("line.separator");; + public static final String FILESPLIT = System.getProperty("file.separator");; - public static final String CATH_DOWNLOAD = "http://release.cathdb.info/"; - - String cathDownloadUrl; - - public static final String NEWLINE; - public static final String FILESPLIT ; - - static { - NEWLINE = System.getProperty("line.separator"); - FILESPLIT = System.getProperty("file.separator"); - } + + private String cathVersion; + private String cathDownloadUrl; + private String cacheLocation ; - AtomicBoolean installedDomainList; - AtomicBoolean installedDomainDescription; - AtomicBoolean installedNodeList; - AtomicBoolean installedDomall; + private AtomicBoolean installedDomainList; + private AtomicBoolean installedDomainDescription; + private AtomicBoolean installedNodeList; + private AtomicBoolean installedDomall; - final boolean useCathDomainDescriptionFile; - final boolean parseCathFragments; + private final boolean useCathDomainDescriptionFile; + private final boolean parseCathFragments; - Map> pdbMap; - Map domainMap; - Map cathTree; - Map> fragmentMap; + private Map> pdbMap; + private Map domainMap; + private Map cathTree; + private Map> fragmentMap; @@ -91,7 +94,7 @@ public CathInstallation(String cacheLocation, boolean usingCDDF, boolean parseCF installedDomall = new AtomicBoolean(false); cathVersion = DEFAULT_VERSION; - cathDownloadUrl = CATH_DOWNLOAD; + cathDownloadUrl = CATH_DOWNLOAD_URL; pdbMap = new HashMap>(); domainMap = new HashMap(); @@ -110,19 +113,32 @@ public CathInstallation() { } public String getDomainListFileName() { - return cacheLocation + domainListFileName + ".v" + cathVersion; + return cacheLocation + buildFileName(domainListFileName); } public String getDomainDescriptionFileName() { - return cacheLocation + domainDescriptionFileName + ".v" + cathVersion; + return cacheLocation + buildFileName(domainDescriptionFileName); } public String getNodeListFileName() { - return cacheLocation + nodeListFileName + ".v" + cathVersion; + return cacheLocation + buildFileName(nodeListFileName); } public String getDomallFileName() { - return cacheLocation + domallFileName + ".v" + cathVersion; + return cacheLocation + buildFileName(domallFileName); + } + + private String buildFileName(String fileNameTemplate) { + return String.format(fileNameTemplate, cathVersion); + } + + private String buildUrl(String remoteFileName) { + String remoteFileNameWithVer = buildFileName(remoteFileName); + String releasesDir = CATH_DOWNLOAD_LATEST_RELEASE_DIR; + if (!cathVersion.equals(CathFactory.LATEST_VERSION)) { + releasesDir = CATH_DOWNLOAD_PREV_RELEASE_DIR; + } + return cathDownloadUrl + releasesDir + "/v" + cathVersion + "/" + CATH_DOWNLOAD_CLASSIFICATION_DATA_DIR + "/" + remoteFileNameWithVer; } public String getCathDownloadUrl() { @@ -418,7 +434,7 @@ private void parseCathDomainDescriptionFile(BufferedReader bufferedReader) throw try { cathDescription.setDate( dateFormat.parse( line.substring(10) ) ); } catch (ParseException e) { - e.printStackTrace(); + LOGGER.error(e.getMessage(), e); } } else if ( line.startsWith("NAME") ) { name.append( line.substring(10) ); @@ -622,7 +638,7 @@ private void parseCathDomall(BufferedReader bufferedReader) throws IOException{ } } - protected void downloadFileFromRemote(URL remoteURL, File localFile) throws FileNotFoundException, IOException{ + protected void downloadFileFromRemote(URL remoteURL, File localFile) throws IOException{ // System.out.println("downloading " + remoteURL + " to: " + localFile); long timeS = System.currentTimeMillis(); @@ -653,7 +669,7 @@ protected void downloadFileFromRemote(URL remoteURL, File localFile) throws File disp = disp / 1024.0; } long timeE = System.currentTimeMillis(); - System.out.println("downloaded " + String.format("%.1f",disp) + unit + " in " + (timeE - timeS)/1000 + " sec."); + LOGGER.info("Downloaded file {} ({}) to local file {} in {} sec.", remoteURL, String.format("%.1f",disp) + unit, localFile, (timeE - timeS)/1000); } private boolean domainDescriptionFileAvailable(){ @@ -680,25 +696,25 @@ private boolean domallFileAvailable() { return f.exists(); } - protected void downloadDomainListFile() throws FileNotFoundException, IOException{ + protected void downloadDomainListFile() throws IOException{ String remoteFilename = domainListFileName; - URL url = new URL(cathDownloadUrl + "v" + cathVersion + "/" + remoteFilename); + URL url = new URL(buildUrl(remoteFilename)); String localFileName = getDomainListFileName(); File localFile = new File(localFileName); downloadFileFromRemote(url, localFile); } - protected void downloadDomainDescriptionFile() throws FileNotFoundException, IOException{ + protected void downloadDomainDescriptionFile() throws IOException{ String remoteFilename = domainDescriptionFileName; - URL url = new URL(cathDownloadUrl + "v" + cathVersion + "/" + remoteFilename); + URL url = new URL(buildUrl(remoteFilename)); String localFileName = getDomainDescriptionFileName(); File localFile = new File(localFileName); downloadFileFromRemote(url, localFile); } - protected void downloadNodeListFile() throws FileNotFoundException, IOException{ + protected void downloadNodeListFile() throws IOException{ String remoteFilename = nodeListFileName; - URL url = new URL(cathDownloadUrl + "v" + cathVersion + "/" + remoteFilename); + URL url = new URL(buildUrl(remoteFilename)); String localFileName = getNodeListFileName(); File localFile = new File(localFileName); downloadFileFromRemote(url, localFile); @@ -706,7 +722,7 @@ protected void downloadNodeListFile() throws FileNotFoundException, IOException{ protected void downloadDomallFile() throws IOException { String remoteFileName = domallFileName; - URL url = new URL(cathDownloadUrl + "v" + cathVersion + "/" + remoteFileName); + URL url = new URL(buildUrl(remoteFileName)); String localFileName = getDomallFileName(); File localFile = new File(localFileName); downloadFileFromRemote(url, localFile); @@ -719,7 +735,7 @@ public void ensureDomainListInstalled(){ try { downloadDomainListFile(); } catch (Exception e){ - e.printStackTrace(); + LOGGER.error("Could not download CATH domain list file. Error: {}", e.getMessage()); installedDomainList.set(false); return; } @@ -728,7 +744,7 @@ public void ensureDomainListInstalled(){ try { parseCathDomainList(); } catch (Exception e){ - e.printStackTrace(); + LOGGER.error(e.getMessage(), e); installedDomainList.set(false); return; } @@ -742,7 +758,7 @@ public void ensureDomainDescriptionInstalled(){ try { downloadDomainDescriptionFile(); } catch (Exception e){ - e.printStackTrace(); + LOGGER.error("Could not download CATH domain description file. Error: {}", e.getMessage()); installedDomainDescription.set(false); return; } @@ -751,7 +767,7 @@ public void ensureDomainDescriptionInstalled(){ try { parseCathDomainDescriptionFile(); } catch (Exception e){ - e.printStackTrace(); + LOGGER.error(e.getMessage(), e); installedDomainDescription.set(false); return; } @@ -765,7 +781,7 @@ public void ensureNodeListInstalled(){ try { downloadNodeListFile(); } catch (Exception e){ - e.printStackTrace(); + LOGGER.error("Could not download CATH node list file. Error: {}", e.getMessage()); installedNodeList.set(false); return; } @@ -774,7 +790,7 @@ public void ensureNodeListInstalled(){ try { parseCathNames(); } catch (Exception e){ - e.printStackTrace(); + LOGGER.error(e.getMessage(), e); installedNodeList.set(false); return; } @@ -795,7 +811,7 @@ public void ensureDomallInstalled() { try { downloadDomallFile(); } catch (Exception e) { - e.printStackTrace(); + LOGGER.error("Could not download CATH domain all file. Error: {}", e.getMessage()); installedDomall.set(false); return; } @@ -804,7 +820,7 @@ public void ensureDomallInstalled() { try { parseCathDomall(); } catch (Exception e) { - e.printStackTrace(); + LOGGER.error(e.getMessage(), e); installedDomall.set(false); return; } @@ -814,5 +830,6 @@ public void ensureDomallInstalled() { public void setCathVersion(String cathVersion) { this.cathVersion = cathVersion; } + } diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/ecod/EcodDomain.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/ecod/EcodDomain.java index d0a3ec3308..1b880feef4 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/ecod/EcodDomain.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/ecod/EcodDomain.java @@ -34,7 +34,7 @@ * chain, residue ranges and status (manual or automatic classification). *

* For detailed explanation about the ECOD information see the original article - * at: http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4256011. + * at: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4256011. *

  * Cheng H, Schaeffer RD, Liao Y, et al. 
  * ECOD: An Evolutionary Classification of Protein Domains. 
diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/MMCIFFileTools.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/MMCIFFileTools.java
index e5ecbb2455..f483cb23d5 100644
--- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/MMCIFFileTools.java
+++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/MMCIFFileTools.java
@@ -511,7 +511,7 @@ public static List convertStructureToAtomSites(Structure s) {
 		List list = new ArrayList();
 
 		for (int m=0;mProtein Structure Comparison Tool V ${project.version} \
       Prlić A, Bliven S, Rose PW, Bluhm WF, Bizon C, Godzik A, Bourne PE.
\ Pre-calculated protein structure alignments at the RCSB PDB website
\ Bioinformatics (2010) 26 (23): 2983-2985
\ - [PubMed]<\ + [PubMed]<\ [pdf]
\
\ CE Reference:
\ Shindyalov IN, Bourne PE (1998)
\ Protein structure alignment by incremental combinatorial extension (CE) of the optimal path.
\ Protein Eng 11: 739-747
\ - [PubMed]\ + [PubMed]\ [pdf]
\
\ FATCAT Reference:
\ @@ -35,7 +35,7 @@ ce.about=

Protein Structure Comparison Tool V ${project.version}

\ BioJava: an open-source framework for bioinformatics in 2012
\ Bioinformatics (2012) 28 (20): 2693-2695
\ [BioJava website]\ - [PubMed]\ + [PubMed]\
\ Jmol web site:\ http://www.jmol.org\ diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/io/TestMMCIFWriting.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/io/TestMMCIFWriting.java index 5fda97a2af..c500bba1c0 100644 --- a/biojava-structure/src/test/java/org/biojava/nbio/structure/io/TestMMCIFWriting.java +++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/io/TestMMCIFWriting.java @@ -28,10 +28,12 @@ import java.io.FileWriter; import java.io.IOException; +import org.biojava.nbio.structure.Atom; import org.biojava.nbio.structure.Chain; import org.biojava.nbio.structure.Structure; import org.biojava.nbio.structure.StructureException; import org.biojava.nbio.structure.StructureIO; +import org.biojava.nbio.structure.StructureTools; import org.biojava.nbio.structure.align.util.AtomCache; import org.biojava.nbio.structure.io.mmcif.MMCIFFileTools; import org.biojava.nbio.structure.io.mmcif.MMcifParser; @@ -45,59 +47,8 @@ public class TestMMCIFWriting { @Test public void test1SMT() throws IOException, StructureException { - AtomCache cache = new AtomCache(); - - StructureIO.setAtomCache(cache); - - cache.setUseMmCif(true); - - FileParsingParameters params = new FileParsingParameters(); - params.setAlignSeqRes(true); - cache.setFileParsingParams(params); - - Structure originalStruct = StructureIO.getStructure("1SMT"); - - File outputFile = File.createTempFile("biojava_testing_", ".cif"); - outputFile.deleteOnExit(); - - FileWriter fw = new FileWriter(outputFile); - fw.write(originalStruct.toMMCIF()); - fw.close(); - - - MMcifParser parser = new SimpleMMcifParser(); - - SimpleMMcifConsumer consumer = new SimpleMMcifConsumer(); - - FileParsingParameters fileParsingParams = new FileParsingParameters(); - fileParsingParams.setAlignSeqRes(true); - - consumer.setFileParsingParameters(fileParsingParams); - - parser.addMMcifConsumer(consumer); - - //parser.parse(new BufferedReader(new FileReader(new File("/home/duarte_j/test.cif")))); - parser.parse(new BufferedReader(new FileReader(outputFile))); - - Structure readStruct = consumer.getStructure(); - - assertNotNull(readStruct); - - assertEquals(originalStruct.getChains().size(), readStruct.getChains().size()); - - for (int i=0;i org.biojava biojava - 4.2.3 + 4.2.4 biojava-survival diff --git a/biojava-ws/pom.xml b/biojava-ws/pom.xml index ecc74b660e..f6ef1afbfa 100644 --- a/biojava-ws/pom.xml +++ b/biojava-ws/pom.xml @@ -3,7 +3,7 @@ biojava org.biojava - 4.2.3 + 4.2.4 biojava-ws biojava-ws @@ -19,7 +19,7 @@ org.biojava biojava-core - 4.2.3 + 4.2.4 compile diff --git a/biojava-ws/src/main/java/org/biojava/nbio/ws/alignment/qblast/BlastAlignmentParameterEnum.java b/biojava-ws/src/main/java/org/biojava/nbio/ws/alignment/qblast/BlastAlignmentParameterEnum.java index 4c70b8147f..061475071a 100644 --- a/biojava-ws/src/main/java/org/biojava/nbio/ws/alignment/qblast/BlastAlignmentParameterEnum.java +++ b/biojava-ws/src/main/java/org/biojava/nbio/ws/alignment/qblast/BlastAlignmentParameterEnum.java @@ -27,7 +27,7 @@ * Alignment request parameters accepted by QBlast service.
* Not all are mandatory. Certain parameters only work with a subset of other parameters in the list. *

- * Taken from Blast URL API + * Taken from Blast URL API * * @author Gediminas Rimsa */ diff --git a/biojava-ws/src/main/java/org/biojava/nbio/ws/alignment/qblast/BlastOutputParameterEnum.java b/biojava-ws/src/main/java/org/biojava/nbio/ws/alignment/qblast/BlastOutputParameterEnum.java index d0dc973f38..dfa144864d 100644 --- a/biojava-ws/src/main/java/org/biojava/nbio/ws/alignment/qblast/BlastOutputParameterEnum.java +++ b/biojava-ws/src/main/java/org/biojava/nbio/ws/alignment/qblast/BlastOutputParameterEnum.java @@ -27,7 +27,7 @@ * Output parameters accepted by QBlast service.
* Not all are mandatory. Certain parameters only work with a subset of other parameters in the list. *

- * Taken from Blast URL API + * Taken from Blast URL API * * @author Gediminas Rimsa */ diff --git a/biojava-ws/src/main/java/org/biojava/nbio/ws/alignment/qblast/NCBIQBlastAlignmentProperties.java b/biojava-ws/src/main/java/org/biojava/nbio/ws/alignment/qblast/NCBIQBlastAlignmentProperties.java index 2e4c47cbdd..3d9ad05377 100644 --- a/biojava-ws/src/main/java/org/biojava/nbio/ws/alignment/qblast/NCBIQBlastAlignmentProperties.java +++ b/biojava-ws/src/main/java/org/biojava/nbio/ws/alignment/qblast/NCBIQBlastAlignmentProperties.java @@ -208,7 +208,7 @@ public int getBlastWordSize() { * WARNING!! At this point, the method does not verify the validity of your choice; for example, word size of * greater than 5 with blastp returns error messages from QBlast. Word size range depends on the algorithm chosen. *

- * More at http://www.ncbi.nlm.nih.gov/staff/tao/URLAPI/new/node74.html + * More at https://www.ncbi.nlm.nih.gov/staff/tao/URLAPI/new/node74.html *

* Blastall equivalent: -W * diff --git a/biojava-ws/src/main/java/org/biojava/nbio/ws/alignment/qblast/NCBIQBlastService.java b/biojava-ws/src/main/java/org/biojava/nbio/ws/alignment/qblast/NCBIQBlastService.java index 12538465bf..a57d0170dd 100644 --- a/biojava-ws/src/main/java/org/biojava/nbio/ws/alignment/qblast/NCBIQBlastService.java +++ b/biojava-ws/src/main/java/org/biojava/nbio/ws/alignment/qblast/NCBIQBlastService.java @@ -64,9 +64,9 @@ public class NCBIQBlastService implements RemotePairwiseAlignmentService { public static final long WAIT_INCREMENT = 3000; private static final MapToStringTransformer MAP_TO_STRING_TRANSFORMER = new MapToStringTransformer(); - private static final String SERVICE_URL = "http://blast.ncbi.nlm.nih.gov/Blast.cgi"; + private static final String SERVICE_URL = "https://blast.ncbi.nlm.nih.gov/Blast.cgi"; private static final String DEFAULT_EMAIL = "anonymous@biojava.org"; - private static final String DEFAULT_TOOL = "biojava3"; + private static final String DEFAULT_TOOL = "biojava5"; private URL serviceUrl; private String email = DEFAULT_EMAIL; diff --git a/pom.xml b/pom.xml index 2b575db99b..3aa5456d2e 100644 --- a/pom.xml +++ b/pom.xml @@ -12,7 +12,7 @@ org.biojava biojava pom - 4.2.3 + 4.2.4 biojava BioJava is an open-source project dedicated to providing a Java framework for processing biological data. It provides analytical and statistical routines, parsers for common file formats and allows the @@ -44,7 +44,7 @@ scm:git:git@github.com:biojava/biojava.git https://github.com/biojava/biojava - biojava-4.2.3 + biojava-4.2.4