Skip to content

Commit ed7ab8f

Browse files
committed
Merge branch 'master' into phylo
2 parents 4cac1d5 + 8676f44 commit ed7ab8f

File tree

166 files changed

+6793
-4176
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

166 files changed

+6793
-4176
lines changed

biojava-aa-prop/pom.xml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,14 @@
99
<name>biojava-aa-prop</name>
1010
<url>http://www.biojava.org</url>
1111

12+
<licenses>
13+
<license>
14+
<name>GNU LGPL v2</name>
15+
<url>http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt</url>
16+
<distribution>repo</distribution>
17+
</license>
18+
</licenses>
19+
1220
<build>
1321
<plugins>
1422
<plugin>

biojava-alignment/pom.xml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,15 @@
99
<artifactId>biojava-alignment</artifactId>
1010
<name>biojava-alignment</name>
1111

12-
<url>http://maven.apache.org</url>
12+
<licenses>
13+
<license>
14+
<name>GNU LGPL v2</name>
15+
<url>http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt</url>
16+
<distribution>repo</distribution>
17+
</license>
18+
</licenses>
19+
20+
<url>http://www.biojava.org</url>
1321
<properties>
1422
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
1523
</properties>
@@ -43,7 +51,6 @@
4351
<dependency>
4452
<groupId>org.biojava.thirdparty</groupId>
4553
<artifactId>forester</artifactId>
46-
<version>1.005</version>
4754
</dependency>
4855
<!-- logging dependencies (managed by parent pom, don't set versions or scopes here) -->
4956
<dependency>

biojava-core/pom.xml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,15 @@
1313
<name>BioJava</name>
1414
<url>http://www.biojava.org</url>
1515
</organization>
16+
17+
<licenses>
18+
<license>
19+
<name>GNU LGPL v2</name>
20+
<url>http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt</url>
21+
<distribution>repo</distribution>
22+
</license>
23+
</licenses>
24+
1625
<build>
1726
<plugins>
1827
<plugin>
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
package demo;
2+
3+
import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
4+
import org.biojava.nbio.core.sequence.DNASequence;
5+
import org.biojava.nbio.core.sequence.ProteinSequence;
6+
import org.biojava.nbio.core.sequence.compound.AmbiguityDNACompoundSet;
7+
import org.biojava.nbio.core.sequence.compound.AmbiguityRNACompoundSet;
8+
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
9+
import org.biojava.nbio.core.sequence.compound.NucleotideCompound;
10+
import org.biojava.nbio.core.sequence.io.DNASequenceCreator;
11+
import org.biojava.nbio.core.sequence.io.FastaReader;
12+
import org.biojava.nbio.core.sequence.io.GenericFastaHeaderParser;
13+
import org.biojava.nbio.core.sequence.template.CompoundSet;
14+
import org.biojava.nbio.core.sequence.template.Sequence;
15+
import org.biojava.nbio.core.sequence.transcription.Frame;
16+
import org.biojava.nbio.core.sequence.transcription.TranscriptionEngine;
17+
18+
import java.io.ByteArrayInputStream;
19+
import java.io.InputStream;
20+
import java.util.LinkedHashMap;
21+
import java.util.Map;
22+
23+
/**
24+
* Created by andreas on 8/10/15.
25+
*/
26+
public class DemoSixFrameTranslation {
27+
28+
public static void main(String[] args){
29+
String dnaFastaS = ">gb:GQ903697|Organism:Arenavirus H0030026 H0030026|Segment:S|Host:Rat\n" +
30+
"CGCACAGAGGATCCTAGGCGTTACTGACTTGCGCTAATAACAGATACTGTTTCATATTTAGATAAAGACC\n" +
31+
"CAGCCAACTGATTGGTCAGCATGGGACAACTTGTGTCCCTCTTCAGTGAAATTCCATCAATCATACACGA\n" +
32+
"AGCTCTCAATGTTGCTCTCGTAGCTGTTAGCATCATTGCAATATTGAAAGGGGTTGTGAATGTTTGGAAG\n" +
33+
"AGTGGAGTTTTGCAGCTTTTGGCCTTCTTGCTCCTGGCGGGAAGATCCTGCTCAGTCATAATTGGTCATC\n" +
34+
"ATCTCGAACTGCAGCATGTGATCTTCAATGGGTCATCAATCACACCCTTTTTACCAGTTACATGTAAGAT\n" +
35+
"CAATGATACCTACTTCCTACTAAGAGGCCCCTATGAAGCTGATTGGGCAGTTGAATTGAGTGTAACTGAA\n" +
36+
"ACCACAGTCTTGGTTGATCTTGAAGGTGGCAGCTCAATGAAGCTGAAAGCCGGAAACATCTCAGGTTGTC\n" +
37+
"TTGGAGACAACCCCCATCTGAGATCAGTGGTCTTCACATTGAATTGGTTGCTAACAGGATTAGATCATGT\n" +
38+
"TATTGATTCTGACCCGAAAATTCTCTGTGATCTTAAAGACAGTGGGCACTTTCGTCTCCAGATGAACTTA\n" +
39+
"ACAGAAAAGCACTATTGTGACAAGTTTCACATCAAAATGGGCAAGGTCTTTGGCGTATTCAAAGATCCGT\n" +
40+
"GCATGGCTGGTGGTAAAATGTTTGCCATACTAAAAAATACCTCTTGGTCGAACCAGTGCCAAGGAAACCA\n" +
41+
"TGTCAGCACCATTCATCTTGTCCTTCAGAGTAATTTCAAACAGGTCCTCAGTAGCAGGAAACTGTTGAAC\n" +
42+
"TTTTTCAGCTGGTCATTGTCTGATGCCACAGGGGCTGATATGCCTGGTGGTTTTTGTCTGGAAAAATGGA\n" +
43+
"TGTTGATTTCAAGTGAACTGAAATGCTTTGGAAACACAGCTGTGGCAAAGTGCAACTTAAATCATGACTC\n" +
44+
"AGAGTTCTGTGACATGCTTAGGCTTTTTGATTTCAACAAAAAGGCAATAGTCACTCTTCAGAACAAAACA\n" +
45+
"AAGCATCGGCTGGACACAGTAATTACTGCTATCAATTCATTGATCTCTGATAATATTCTTATGAAGAACA\n" +
46+
"GGATTAAAGAATTGATAGATGTTCCTTACTGTAATTACACCAAATTTTGGTATGTCAATCACACAGGTCT\n" +
47+
"AAATCTGCACACCCTTCCAAGATGTTGGCTTGTTAAAAATGGTAGCTACTTGAATGTGTCTGACTTCAGG\n" +
48+
"AATGAGTGGATATTGGAGAGTGATCATCTTGTTTCGGAGATCCTTTCAAAGGAGTATGAGGAAAGGCAAA\n" +
49+
"ATCGTACACCACTCTCACTGGTTGACATCTGTTTCTGGAGTACATTGTTTTACACAGCATCAATTTTCCT\n" +
50+
"ACACCTCTTGAGAATTCCAACCCACAGACACATTGTTGGTGAGGGCTGCCCGAAGCCTCATAGGCTAAAC\n" +
51+
"AGGCACTCAATATGTGCTTGTGGCCTTTTCAAACAAGAAGGCAGACCCTTGAGATGGGTAAGAAAGGTGT\n" +
52+
"GAACAATGGTTGCTTGGTGGCCTCCATTGCTGCACCCCCCTAGGGGGGTGCAGCAATGGAGGTTCTCGYT\n" +
53+
"GAGCCTAGAGAACAACTGTTGAATCGGGTTCTCTAAAGAGAACATCGATTGGTAGTACCCTTTTTGGTTT\n" +
54+
"TTCATTGGTCACTGACCCTGAAAGCACAGCACTGAACATCAAACAGTCCAAAAGTGCACAGTGTGCATTT\n" +
55+
"GTTGTGGCTGGTGCTGATCCTTTCTTCTTACTTTTAATGACTATTCCCTTATGTCTGTCACACAGATGTT\n" +
56+
"CAAATCTCTTCCAAACAAGATCTTCAAAGAGCCGTGACTGTTCTGCGGTCAGTTTGACATCAACAATCTT\n" +
57+
"CAAATCCTGTCTTCCATGCATATCAAAGAGCCTCCTAATATCATCAGCACCTTGCGCAGTGAAAACCATG\n" +
58+
"GATTTAGGCAGACTCCTTATTATGCTTGTGATGAGGCCAGGTCGTGCATGTTCAACATCCTTCAGCAATA\n" +
59+
"TCCCATGACAATATTTACTTTGGTCCTTAAAAGATTTTATGTCATTGGGTTTTCTGTAGCAGTGGATGAA\n" +
60+
"TTTTTGTGATTCAGGCTGGTAAATTGCAAACTCAACAGGGTCATGTGGCGGGCCTTCAATGTCAATCCAT\n" +
61+
"GTTGTGTCACTGACCATCAACGACTCTACACTTCTCTTCACCTGAGCCTCCACCTCAGGCTTGAGCGTGG\n" +
62+
"ACAAGAGTGGGGCACCACCGTTCCGGATGGGGACTGGTGTTTTGCTTGGTAAACTCTCAAATTCCACAAC\n" +
63+
"TGTATTGTCCCATGCTCTCCCTTTGATCTGTGATCTTGATGAAATGTAAGGCCAGCCCTCACCAGAGAGA\n" +
64+
"CACACCTTATAAAGTATGTTTTCATAAGGATTCCTCTGTCCTGGTATGGCACTGATGAACATGTTTTCCC\n" +
65+
"TCTTTTTGATCTCCAAGAGGGTTTTTATAATGGTTGTGAATGTGGACTCCTCAATCTTTATTGTTTCCAG\n" +
66+
"CATGTTGCCACCATCAATCAGGCAAGCACCGGCTTTCACAGCAGCTGATAAACTAAGGTTGTAGCCTGAT\n" +
67+
"ATGTTAATTTGAGAATCCTCCTGAGTGATTACCTTTAGAGAAGGATGCTTCTCCATCAAAGCATCTAAGT\n" +
68+
"CACTTAAATTAGGGTATTTTGCTGTGTATAGCAACCCCAGATCTGTGAGGGCCTGAACCACATCATTTAG\n" +
69+
"AGTTTCCCCTCCCTGTTCAGTCATACAGGAAATTGTGAGTGCTGGCATCGATCCAAATTGGTTGATCATA\n" +
70+
"AGTGATGAGTCTTTAACGTCCCAGACTTTGACCACCCCTCCAGTTCTAGCCAACCCAGGTCTCTGAATAC\n" +
71+
"CAACAAGTTGCAGAATTTCGGACCTCCTGGTGAGCTGTGTTGTAGAGAGGTTCCCTAGATACTGGCCACC\n" +
72+
"TGTGGCTGTCAACCTCTCTGTTCTTTGAACTTTTTGCCTTAATTTGTCCAAGTCACTGGAGAGTTCCATT\n" +
73+
"AGCTCTTCCTTTGACAATGATCCTATCTTAAGGAACATGTTCTTTTGGGTTGACTTCATGACCATCAATG\n" +
74+
"AGTCAACTTCCTTATTCAAGTCCCTCAAACTAACAAGATCACTGTCATCTCTTTTAGACCTCCTCATCAT\n" +
75+
"GCGTTGCACACTTGCAACCTTTGAAAAATCTAAGCCGGACAGAAGAGCCCTCGCGTCAGTTAGGACATCT\n" +
76+
"GCCTTAACAGCAGTTGTCCAGTTCGAGAGTCCTCTCCTGAGAGACTGTGTCCATCTGAATGATGGGATTG\n" +
77+
"GTTGTTCGCTCATAGTGATGAAATTGCGCAGAGTTATCCAAAAGCCTAGGATCCTCTGTGCG";
78+
79+
80+
try {
81+
82+
// parse the raw sequence from the string
83+
InputStream stream = new ByteArrayInputStream(dnaFastaS.getBytes());
84+
85+
// define the Ambiguity Compound Sets
86+
AmbiguityDNACompoundSet ambiguityDNACompoundSet = AmbiguityDNACompoundSet.getDNACompoundSet();
87+
CompoundSet<NucleotideCompound> nucleotideCompoundSet = AmbiguityRNACompoundSet.getRNACompoundSet();
88+
89+
FastaReader<DNASequence, NucleotideCompound> proxy =
90+
new FastaReader<DNASequence, NucleotideCompound>(
91+
stream,
92+
new GenericFastaHeaderParser<DNASequence, NucleotideCompound>(),
93+
new DNASequenceCreator(ambiguityDNACompoundSet));
94+
95+
// has only one entry in this example, but could be easily extended to parse a FASTA file with multiple sequences
96+
LinkedHashMap<String, DNASequence> dnaSequences = proxy.process();
97+
98+
// Initialize the Transcription Engine
99+
TranscriptionEngine engine = new
100+
TranscriptionEngine.Builder().dnaCompounds(ambiguityDNACompoundSet).rnaCompounds(nucleotideCompoundSet).build();
101+
102+
Frame[] sixFrames = Frame.getAllFrames();
103+
104+
105+
106+
for (DNASequence dna : dnaSequences.values()) {
107+
108+
Map<Frame, Sequence<AminoAcidCompound>> results = engine.multipleFrameTranslation(dna, sixFrames);
109+
110+
for (Frame frame : sixFrames){
111+
System.out.println("Translated Frame:" + frame +" : " + results.get(frame));
112+
//System.out.println(dna.getRNASequence(frame).getProteinSequence(engine));
113+
114+
ProteinSequence ps = new ProteinSequence(results.get(frame).getSequenceAsString());
115+
System.out.println(ps);
116+
try {
117+
118+
} catch (Exception e){
119+
System.err.println(e.getMessage() + " when trying to translate frame " + frame);
120+
}
121+
}
122+
123+
}
124+
} catch (Exception e){
125+
e.printStackTrace();
126+
}
127+
128+
129+
}
130+
131+
132+
133+
}
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
package demo;
2+
3+
4+
import java.io.File;
5+
import java.io.FileInputStream;
6+
import java.io.IOException;
7+
import java.io.InputStream;
8+
import java.util.LinkedHashMap;
9+
import java.util.logging.Level;
10+
import java.util.logging.Logger;
11+
import org.biojava.nbio.core.sequence.ProteinSequence;
12+
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
13+
import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
14+
import org.biojava.nbio.core.sequence.io.FastaReader;
15+
import org.biojava.nbio.core.sequence.io.GenericFastaHeaderParser;
16+
import org.biojava.nbio.core.sequence.io.ProteinSequenceCreator;
17+
import org.biojava.nbio.core.util.InputStreamProvider;
18+
19+
20+
/**
21+
* Created by andreas on 6/17/15.
22+
*/
23+
public class ParseFastaFileDemo {
24+
25+
26+
public ParseFastaFileDemo(){
27+
28+
29+
}
30+
31+
/** e.g. download ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz
32+
* and pass in path to local location of file
33+
*
34+
* @param args
35+
*/
36+
public static void main(String[] args) {
37+
38+
int mb = 1024*1024;
39+
40+
//Getting the runtime reference from system
41+
Runtime runtime = Runtime.getRuntime();
42+
43+
System.out.println("##### Heap utilization statistics [MB] #####");
44+
45+
//Print used memory
46+
System.out.println("Used Memory:"
47+
+ (runtime.totalMemory() - runtime.freeMemory()) / mb);
48+
49+
//Print free memory
50+
System.out.println("Free Memory:"
51+
+ runtime.freeMemory() / mb);
52+
53+
//Print total available memory
54+
System.out.println("Total Memory:" + runtime.totalMemory() / mb);
55+
56+
//Print Maximum available memory
57+
System.out.println("Max Memory:" + runtime.maxMemory() / mb);
58+
59+
60+
if ( args.length < 1) {
61+
System.err.println("First argument needs to be path to fasta file");
62+
return;
63+
}
64+
65+
File f = new File(args[0]);
66+
67+
if ( ! f.exists()) {
68+
System.err.println("File does not exist " + args[0]);
69+
return;
70+
}
71+
72+
long timeS = System.currentTimeMillis();
73+
74+
try {
75+
76+
// automatically uncompress files using InputStreamProvider
77+
InputStreamProvider isp = new InputStreamProvider();
78+
79+
InputStream inStream = isp.getInputStream(f);
80+
81+
82+
FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(
83+
inStream,
84+
new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
85+
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
86+
87+
LinkedHashMap<String, ProteinSequence> b;
88+
89+
int nrSeq = 0;
90+
91+
while ((b = fastaReader.process(100)) != null) {
92+
for (String key : b.keySet()) {
93+
nrSeq++;
94+
//System.out.println(nrSeq + " : " + key + " " + b.get(key));
95+
if ( nrSeq % 100000 == 0)
96+
System.out.println(nrSeq );
97+
}
98+
99+
}
100+
long timeE = System.currentTimeMillis();
101+
System.out.println("parsed a total of " + nrSeq + " TREMBL sequences! in " + (timeE - timeS));
102+
} catch (Exception ex) {
103+
Logger.getLogger(ParseFastaFileDemo.class.getName()).log(Level.SEVERE, null, ex);
104+
}
105+
}
106+
}

biojava-core/src/main/java/org/biojava/nbio/core/search/io/Hit.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,23 @@
1+
/*
2+
* BioJava development code
3+
*
4+
* This code may be freely distributed and modified under the
5+
* terms of the GNU Lesser General Public Licence. This should
6+
* be distributed with the code. If you do not have a copy,
7+
* see:
8+
*
9+
* http://www.gnu.org/copyleft/lesser.html
10+
*
11+
* Copyright for this code is held jointly by the individual
12+
* authors. These should be listed in @author doc comments.
13+
*
14+
* For more information on the BioJava project and its aims,
15+
* or to join the biojava-l mailing list, visit the home page
16+
* at:
17+
*
18+
* http://www.biojava.org/
19+
*
20+
*/
121
package org.biojava.nbio.core.search.io;
222

323
import java.util.Iterator;

biojava-core/src/main/java/org/biojava/nbio/core/search/io/Hsp.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,23 @@
1+
/*
2+
* BioJava development code
3+
*
4+
* This code may be freely distributed and modified under the
5+
* terms of the GNU Lesser General Public Licence. This should
6+
* be distributed with the code. If you do not have a copy,
7+
* see:
8+
*
9+
* http://www.gnu.org/copyleft/lesser.html
10+
*
11+
* Copyright for this code is held jointly by the individual
12+
* authors. These should be listed in @author doc comments.
13+
*
14+
* For more information on the BioJava project and its aims,
15+
* or to join the biojava-l mailing list, visit the home page
16+
* at:
17+
*
18+
* http://www.biojava.org/
19+
*
20+
*/
121
package org.biojava.nbio.core.search.io;
222

323
import java.util.ArrayList;

biojava-core/src/main/java/org/biojava/nbio/core/search/io/Result.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,23 @@
1+
/*
2+
* BioJava development code
3+
*
4+
* This code may be freely distributed and modified under the
5+
* terms of the GNU Lesser General Public Licence. This should
6+
* be distributed with the code. If you do not have a copy,
7+
* see:
8+
*
9+
* http://www.gnu.org/copyleft/lesser.html
10+
*
11+
* Copyright for this code is held jointly by the individual
12+
* authors. These should be listed in @author doc comments.
13+
*
14+
* For more information on the BioJava project and its aims,
15+
* or to join the biojava-l mailing list, visit the home page
16+
* at:
17+
*
18+
* http://www.biojava.org/
19+
*
20+
*/
121
package org.biojava.nbio.core.search.io;
222

323
import java.util.HashMap;

biojava-core/src/main/java/org/biojava/nbio/core/search/io/ResultFactory.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,23 @@
1+
/*
2+
* BioJava development code
3+
*
4+
* This code may be freely distributed and modified under the
5+
* terms of the GNU Lesser General Public Licence. This should
6+
* be distributed with the code. If you do not have a copy,
7+
* see:
8+
*
9+
* http://www.gnu.org/copyleft/lesser.html
10+
*
11+
* Copyright for this code is held jointly by the individual
12+
* authors. These should be listed in @author doc comments.
13+
*
14+
* For more information on the BioJava project and its aims,
15+
* or to join the biojava-l mailing list, visit the home page
16+
* at:
17+
*
18+
* http://www.biojava.org/
19+
*
20+
*/
121
package org.biojava.nbio.core.search.io;
222

323
import java.io.File;

0 commit comments

Comments
 (0)