Skip to content

Commit 83ef906

Browse files
committed
Merge branch 'master' of github.com:biojava/biojava into mmtf
2 parents c6dddfd + 0c99ed0 commit 83ef906

File tree

72 files changed

+5229
-188
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+5229
-188
lines changed

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
language: java
22
jdk:
33
- oraclejdk8
4-
sudo: false
4+
sudo: required
55
cache:
66
directories:
77
- "$HOME/.m2"

biojava-core/src/main/java/org/biojava/nbio/core/sequence/io/FastaReader.java

Lines changed: 16 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -188,43 +188,36 @@ public LinkedHashMap<String,S> process(int max) throws IOException {
188188
line = br.readLine();
189189

190190
if (line == null) {
191-
192-
193-
// Fix for #282
194-
if ( sequences.size() == 0 && max != -1) {
195-
return null;
196-
}
197-
198191
//i.e. EOF
199-
String seq = sb.toString();
200-
if ( seq.length() == 0) {
192+
if ( sb.length() == 0 && header.length() != 0 ) {
201193
logger.warn("Can't parse sequence {}. Got sequence of length 0!", sequenceIndex);
202194
logger.warn("header: {}", header);
203-
}
204-
//logger.info("Sequence index=" + sequenceIndex + " " + fileIndex );
205-
try {
206-
@SuppressWarnings("unchecked")
207-
S sequence = (S)sequenceCreator.getSequence(seq, sequenceIndex);
208-
headerParser.parseHeader(header, sequence);
209-
sequences.put(sequence.getAccession().getID(),sequence);
210-
processedSequences++;
211-
} catch (CompoundNotFoundException e) {
212-
logger.warn("Sequence with header '{}' has unrecognised compounds ({}), it will be ignored",
213-
header, e.getMessage());
195+
header = null;
196+
} else if ( sb.length() > 0 ) {
197+
//logger.info("Sequence index=" + sequenceIndex + " " + fileIndex );
198+
try {
199+
@SuppressWarnings("unchecked")
200+
S sequence = (S)sequenceCreator.getSequence(sb.toString(), sequenceIndex);
201+
headerParser.parseHeader(header, sequence);
202+
sequences.put(sequence.getAccession().getID(),sequence);
203+
processedSequences++;
204+
header = null;
205+
} catch (CompoundNotFoundException e) {
206+
logger.warn("Sequence with header '{}' has unrecognised compounds ({}), it will be ignored",
207+
header, e.getMessage());
208+
}
214209
}
215210
keepGoing = false;
216211
}
217212
if (max > -1 && processedSequences>=max) {
218213
keepGoing=false;
219214
}
220-
if ( this.line == null)
221-
keepGoing = false;
222215
} while (keepGoing);
223216

224217
this.line = line;
225218
this.header= header;
226219

227-
return sequences;
220+
return max > -1 && sequences.isEmpty() ? null : sequences;
228221
}
229222

230223
public void close() throws IOException {
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
package org.biojava.nbio.core.fasta;
2+
3+
import java.io.InputStream;
4+
import java.util.LinkedHashMap;
5+
6+
import static org.junit.Assert.* ;
7+
import static org.hamcrest.CoreMatchers.* ;
8+
9+
import org.biojava.nbio.core.sequence.ProteinSequence;
10+
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
11+
import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
12+
import org.biojava.nbio.core.sequence.io.FastaReader;
13+
import org.biojava.nbio.core.sequence.io.GenericFastaHeaderParser;
14+
import org.biojava.nbio.core.sequence.io.ProteinSequenceCreator;
15+
import org.biojava.nbio.core.sequence.io.util.ClasspathResource;
16+
import org.junit.Test;
17+
18+
19+
public class TestFASTAReader {
20+
21+
private void testProcessAll(String path) throws Exception {
22+
ClasspathResource r = new ClasspathResource(path);
23+
FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = null ;
24+
try( InputStream inStream = r.getInputStream() ) {
25+
fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(
26+
inStream,
27+
new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
28+
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
29+
LinkedHashMap<String, ProteinSequence> sequences = fastaReader.process();
30+
assertThat(sequences,is(notNullValue()));
31+
assertThat(sequences.size(),is(1));
32+
assertThat(sequences.containsKey("P02768"),is(true));
33+
assertThat(sequences.get("P02768").getLength(),is(609));
34+
} finally {
35+
if(fastaReader != null) fastaReader.close();
36+
}
37+
}
38+
39+
/**
40+
* Test file contains one sequence (P02768 from swissprot). Read the whole
41+
* file all at once by calling {@link FastaReader#process()} and verify that
42+
* one sequence is read.
43+
*
44+
* @throws Exception
45+
*/
46+
@Test
47+
public void testProcessAll() throws Exception {
48+
testProcessAll("org/biojava/nbio/core/fasta/P02768.fasta");
49+
}
50+
51+
/**
52+
* Same as {@link #testProcessAll()} but input files contains blank lines
53+
*
54+
* @throws Exception
55+
*/
56+
@Test
57+
public void testProcessAllWithBlankLines() throws Exception {
58+
testProcessAll("org/biojava/nbio/core/fasta/P02768_blank_lines.fasta");
59+
}
60+
61+
private void testProcess1(String path) throws Exception {
62+
ClasspathResource r = new ClasspathResource(path);
63+
FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = null ;
64+
try( InputStream inStream = r.getInputStream() ) {
65+
fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(
66+
inStream,
67+
new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
68+
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
69+
LinkedHashMap<String,ProteinSequence> out1 = fastaReader.process(1);
70+
assertThat(out1,is(notNullValue()));
71+
assertThat(out1.size(),is(1));
72+
assertThat(out1.containsKey("P02768"),is(true));
73+
assertThat(out1.get("P02768").getLength(),is(609));
74+
LinkedHashMap<String,ProteinSequence> out2 = fastaReader.process(1);
75+
assertThat(out2,is(nullValue()));
76+
} finally {
77+
if(fastaReader != null) fastaReader.close();
78+
}
79+
}
80+
81+
/**
82+
* Test file contains one sequence (P02768 from swissprot). Read one
83+
* sequence at a time by calling {@link FastaReader#process(int)} and verify
84+
* that the first call get one sequence and the second call get none.
85+
*
86+
* @throws Exception
87+
*/
88+
@Test
89+
public void testProcess1() throws Exception {
90+
testProcess1("org/biojava/nbio/core/fasta/P02768.fasta");
91+
}
92+
93+
/**
94+
* Same as {@link #testProcess1()}, but input contains blank lines.
95+
*
96+
* @throws Exception
97+
*/
98+
@Test
99+
public void testProcess1WithBlankLines() throws Exception {
100+
testProcess1("org/biojava/nbio/core/fasta/P02768_blank_lines.fasta");
101+
}
102+
103+
private void testProcess2(String path) throws Exception {
104+
ClasspathResource r = new ClasspathResource(path);
105+
FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = null ;
106+
try( InputStream inStream = r.getInputStream() ) {
107+
fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(
108+
inStream,
109+
new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
110+
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
111+
LinkedHashMap<String,ProteinSequence> out1 = fastaReader.process(1);
112+
assertThat(out1,is(notNullValue()));
113+
assertThat(out1.size(),is(1));
114+
assertThat(out1.containsKey("P02768"),is(true));
115+
assertThat(out1.get("P02768").getLength(),is(609));
116+
LinkedHashMap<String,ProteinSequence> out2 = fastaReader.process(1);
117+
assertThat(out2,is(notNullValue()));
118+
assertThat(out2.size(),is(1));
119+
assertThat(out2.containsKey("P00698"),is(true));
120+
assertThat(out2.get("P00698").getLength(),is(147));
121+
LinkedHashMap<String,ProteinSequence> out3 = fastaReader.process(1);
122+
assertThat(out3,is(nullValue()));
123+
} finally {
124+
if(fastaReader != null) fastaReader.close();
125+
}
126+
}
127+
128+
/**
129+
* Test file contains two sequences. Read one sequence at a time by calling
130+
* {@link FastaReader#process(int)} and verify that the first and second
131+
* call get one sequence each and the third call get none.
132+
*
133+
* @throws Exception
134+
*/
135+
@Test
136+
public void testProcess2() throws Exception {
137+
testProcess2("org/biojava/nbio/core/fasta/TwoSequences.fasta");
138+
}
139+
140+
/**
141+
* Sane as {@link #testProcess2()} but input file contain blank lines
142+
* @throws Exception
143+
*/
144+
@Test
145+
public void testProcess2WithBlankLines() throws Exception {
146+
testProcess2("org/biojava/nbio/core/fasta/TwoSequences_blank_lines.fasta");
147+
}
148+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
>sp|P02768|ALBU_HUMAN Serum albumin OS=Homo sapiens GN=ALB PE=1 SV=2
2+
MKWVTFISLLFLFSSAYSRGVFRRDAHKSEVAHRFKDLGEENFKALVLIAFAQYLQQCPF
3+
EDHVKLVNEVTEFAKTCVADESAENCDKSLHTLFGDKLCTVATLRETYGEMADCCAKQEP
4+
ERNECFLQHKDDNPNLPRLVRPEVDVMCTAFHDNEETFLKKYLYEIARRHPYFYAPELLF
5+
FAKRYKAAFTECCQAADKAACLLPKLDELRDEGKASSAKQRLKCASLQKFGERAFKAWAV
6+
ARLSQRFPKAEFAEVSKLVTDLTKVHTECCHGDLLECADDRADLAKYICENQDSISSKLK
7+
ECCEKPLLEKSHCIAEVENDEMPADLPSLAADFVESKDVCKNYAEAKDVFLGMFLYEYAR
8+
RHPDYSVVLLLRLAKTYETTLEKCCAAADPHECYAKVFDEFKPLVEEPQNLIKQNCELFE
9+
QLGEYKFQNALLVRYTKKVPQVSTPTLVEVSRNLGKVGSKCCKHPEAKRMPCAEDYLSVV
10+
LNQLCVLHEKTPVSDRVTKCCTESLVNRRPCFSALEVDETYVPKEFNAETFTFHADICTL
11+
SEKERQIKKQTALVELVKHKPKATKEQLKAVMDDFAAFVEKCCKADDKETCFAEEGKKLV
12+
AASQAALGL
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
>sp|P02768|ALBU_HUMAN Serum albumin OS=Homo sapiens GN=ALB PE=1 SV=2
2+
MKWVTFISLLFLFSSAYSRGVFRRDAHKSEVAHRFKDLGEENFKALVLIAFAQYLQQCPF
3+
EDHVKLVNEVTEFAKTCVADESAENCDKSLHTLFGDKLCTVATLRETYGEMADCCAKQEP
4+
ERNECFLQHKDDNPNLPRLVRPEVDVMCTAFHDNEETFLKKYLYEIARRHPYFYAPELLF
5+
FAKRYKAAFTECCQAADKAACLLPKLDELRDEGKASSAKQRLKCASLQKFGERAFKAWAV
6+
7+
8+
9+
ARLSQRFPKAEFAEVSKLVTDLTKVHTECCHGDLLECADDRADLAKYICENQDSISSKLK
10+
ECCEKPLLEKSHCIAEVENDEMPADLPSLAADFVESKDVCKNYAEAKDVFLGMFLYEYAR
11+
RHPDYSVVLLLRLAKTYETTLEKCCAAADPHECYAKVFDEFKPLVEEPQNLIKQNCELFE
12+
QLGEYKFQNALLVRYTKKVPQVSTPTLVEVSRNLGKVGSKCCKHPEAKRMPCAEDYLSVV
13+
LNQLCVLHEKTPVSDRVTKCCTESLVNRRPCFSALEVDETYVPKEFNAETFTFHADICTL
14+
SEKERQIKKQTALVELVKHKPKATKEQLKAVMDDFAAFVEKCCKADDKETCFAEEGKKLV
15+
AASQAALGL
16+
17+
18+
19+
20+
21+
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
>sp|P02768|ALBU_HUMAN Serum albumin OS=Homo sapiens GN=ALB PE=1 SV=2
2+
MKWVTFISLLFLFSSAYSRGVFRRDAHKSEVAHRFKDLGEENFKALVLIAFAQYLQQCPF
3+
EDHVKLVNEVTEFAKTCVADESAENCDKSLHTLFGDKLCTVATLRETYGEMADCCAKQEP
4+
ERNECFLQHKDDNPNLPRLVRPEVDVMCTAFHDNEETFLKKYLYEIARRHPYFYAPELLF
5+
FAKRYKAAFTECCQAADKAACLLPKLDELRDEGKASSAKQRLKCASLQKFGERAFKAWAV
6+
ARLSQRFPKAEFAEVSKLVTDLTKVHTECCHGDLLECADDRADLAKYICENQDSISSKLK
7+
ECCEKPLLEKSHCIAEVENDEMPADLPSLAADFVESKDVCKNYAEAKDVFLGMFLYEYAR
8+
RHPDYSVVLLLRLAKTYETTLEKCCAAADPHECYAKVFDEFKPLVEEPQNLIKQNCELFE
9+
QLGEYKFQNALLVRYTKKVPQVSTPTLVEVSRNLGKVGSKCCKHPEAKRMPCAEDYLSVV
10+
LNQLCVLHEKTPVSDRVTKCCTESLVNRRPCFSALEVDETYVPKEFNAETFTFHADICTL
11+
SEKERQIKKQTALVELVKHKPKATKEQLKAVMDDFAAFVEKCCKADDKETCFAEEGKKLV
12+
AASQAALGL
13+
>sp|P00698|LYSC_CHICK Lysozyme C OS=Gallus gallus GN=LYZ PE=1 SV=1
14+
MRSLLILVLCFLPLAALGKVFGRCELAAAMKRHGLDNYRGYSLGNWVCAAKFESNFNTQA
15+
TNRNTDGSTDYGILQINSRWWCNDGRTPGSRNLCNIPCSALLSSDITASVNCAKKIVSDG
16+
NGMNAWVAWRNRCKGTDVQAWIRGCRL
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
>sp|P02768|ALBU_HUMAN Serum albumin OS=Homo sapiens GN=ALB PE=1 SV=2
2+
MKWVTFISLLFLFSSAYSRGVFRRDAHKSEVAHRFKDLGEENFKALVLIAFAQYLQQCPF
3+
EDHVKLVNEVTEFAKTCVADESAENCDKSLHTLFGDKLCTVATLRETYGEMADCCAKQEP
4+
ERNECFLQHKDDNPNLPRLVRPEVDVMCTAFHDNEETFLKKYLYEIARRHPYFYAPELLF
5+
6+
7+
FAKRYKAAFTECCQAADKAACLLPKLDELRDEGKASSAKQRLKCASLQKFGERAFKAWAV
8+
ARLSQRFPKAEFAEVSKLVTDLTKVHTECCHGDLLECADDRADLAKYICENQDSISSKLK
9+
ECCEKPLLEKSHCIAEVENDEMPADLPSLAADFVESKDVCKNYAEAKDVFLGMFLYEYAR
10+
RHPDYSVVLLLRLAKTYETTLEKCCAAADPHECYAKVFDEFKPLVEEPQNLIKQNCELFE
11+
QLGEYKFQNALLVRYTKKVPQVSTPTLVEVSRNLGKVGSKCCKHPEAKRMPCAEDYLSVV
12+
LNQLCVLHEKTPVSDRVTKCCTESLVNRRPCFSALEVDETYVPKEFNAETFTFHADICTL
13+
SEKERQIKKQTALVELVKHKPKATKEQLKAVMDDFAAFVEKCCKADDKETCFAEEGKKLV
14+
AASQAALGL
15+
16+
17+
>sp|P00698|LYSC_CHICK Lysozyme C OS=Gallus gallus GN=LYZ PE=1 SV=1
18+
MRSLLILVLCFLPLAALGKVFGRCELAAAMKRHGLDNYRGYSLGNWVCAAKFESNFNTQA
19+
20+
TNRNTDGSTDYGILQINSRWWCNDGRTPGSRNLCNIPCSALLSSDITASVNCAKKIVSDG
21+
NGMNAWVAWRNRCKGTDVQAWIRGCRL

biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/PDBFileParserTest.java

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
import java.io.IOException;
3333
import java.io.InputStream;
3434
import java.io.StringReader;
35-
3635
import org.biojava.nbio.structure.Atom;
3736
import org.biojava.nbio.structure.Chain;
3837
import org.biojava.nbio.structure.ChainImpl;
@@ -50,6 +49,12 @@
5049
import org.junit.Before;
5150
import org.junit.Test;
5251

52+
/**
53+
* Test the {@link PDBFileParser}.
54+
*
55+
* @author Aleix Lafita
56+
*
57+
*/
5358
public class PDBFileParserTest {
5459

5560
private static PDBFileParser parser;
@@ -555,8 +560,6 @@ public void testCorrectAtomNamePadding() throws IOException {
555560

556561
/**
557562
* Test handling of missing Element column. Issue 537 in github.
558-
* @author Aleix Lafita
559-
* @throws IOException
560563
*/
561564
@Test
562565
public void testMissingElements() throws IOException {
@@ -627,4 +630,28 @@ public void testMissingElements() throws IOException {
627630
assertTrue("the Element column has not been filled correctly", pdb.equals(original));
628631

629632
}
633+
634+
/**
635+
* Test the parsing of release and last modified dates.
636+
*/
637+
@Test
638+
public void testDates() throws IOException {
639+
640+
String revisionDates =
641+
"REVDAT 5 13-JUL-11 1STP 1 VERSN "+newline+
642+
"REVDAT 4 24-FEB-09 1STP 1 VERSN " + newline+
643+
"REVDAT 3 01-APR-03 1STP 1 JRNL " + newline+
644+
"REVDAT 2 15-OCT-94 1STP 1 AUTHOR " + newline+
645+
"REVDAT 1 15-OCT-92 1STP 0 " + newline;
646+
647+
BufferedReader br = new BufferedReader(new StringReader(revisionDates));
648+
Structure s = parser.parsePDBFile(br);
649+
650+
// The latest modified date should be 2011
651+
assertEquals(s.getPDBHeader().getModDate().getYear() + 1900, 2011);
652+
653+
// The release date should be 1992
654+
assertEquals(s.getPDBHeader().getRelDate().getYear() + 1900, 1992);
655+
656+
}
630657
}

biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/io/StructureIOTest.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
*/
2121
package org.biojava.nbio.structure.test.io;
2222

23-
2423
import static org.junit.Assert.*;
2524

2625
import java.io.IOException;
@@ -29,15 +28,16 @@
2928
import org.biojava.nbio.structure.StructureIO;
3029
import org.junit.Test;
3130

32-
33-
31+
/**
32+
* Test StructureIO methods.
33+
*
34+
*/
3435
public class StructureIOTest {
3536

3637
@Test
3738
public void testStructureIO() throws IOException, StructureException {
3839

3940
String pdbId = "1gav";
40-
4141
int nrAssembls = StructureIO.getBiologicalAssemblies(pdbId).size();
4242
assertEquals(1,nrAssembls);
4343

biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ public class TestQuatSymmetryDetectorExamples {
5555
@Test
5656
public void testNMR() throws IOException, StructureException {
5757

58-
Structure pdb = StructureIO.getStructure("BIO:1b4c:1");
58+
// as of mmCIF v5 there's no bioassemblies for NMR entries, so now we use the AU (bioassembly 0) - JD 2017-08-02
59+
Structure pdb = StructureIO.getStructure("BIO:1b4c:0");
5960

6061
SubunitClustererParameters clusterParams = new SubunitClustererParameters();
6162
QuatSymmetryParameters symmParams = new QuatSymmetryParameters();

0 commit comments

Comments
 (0)