Skip to content

Commit a2ac8f5

Browse files
committed
1. Added a new folder into main/java/org/biojava/nbio/core/sequence package
for Reference tag. It contains ReferenceInterface, AbstractReference and GenbankReference (Only for Genbank format files). 2. Added List of AbstractReference with getter and setter into AbstractSequence.java. 3. Updated GenbankSequenceParser to parse the Reference tag. 4. Implemented addReference method in GenericGenbankHeaderParser because some Genbank format file may contain multiple Reference tag. 5. Updated GenbankReaderTest with to check the Reference tag parsing process.
1 parent d8fa109 commit a2ac8f5

File tree

7 files changed

+211
-1
lines changed

7 files changed

+211
-1
lines changed

biojava-core/src/main/java/org/biojava/nbio/core/sequence/io/GenbankSequenceParser.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
import org.biojava.nbio.core.sequence.location.InsdcParser;
4646
import org.biojava.nbio.core.sequence.location.template.AbstractLocation;
4747
import org.biojava.nbio.core.sequence.location.template.Location;
48+
import org.biojava.nbio.core.sequence.reference.GenbankReference;
4849
import org.biojava.nbio.core.sequence.template.AbstractSequence;
4950
import org.biojava.nbio.core.sequence.template.Compound;
5051
import org.biojava.nbio.core.sequence.template.CompoundSet;
@@ -214,6 +215,19 @@ private String parse(BufferedReader bufferedReader) {
214215
} else if (sectionKey.equals(SOURCE_TAG)) {
215216
// ignore - can get all this from the first feature
216217
} else if (sectionKey.equals(REFERENCE_TAG)) {
218+
if (!section.isEmpty()) {
219+
GenbankReference genbankReference = new GenbankReference();
220+
for (String[] ref : section) {
221+
if (ref[0].equals(AUTHORS_TAG)) {
222+
genbankReference.setAuthors(ref[1]);
223+
} else if (ref[0].equals(TITLE_TAG)) {
224+
genbankReference.setTitle(ref[1]);
225+
} else if (ref[0].equals(JOURNAL_TAG)) {
226+
genbankReference.setJournal(ref[1]);
227+
}
228+
}
229+
headerParser.addReference(genbankReference);
230+
}
217231
} else if (sectionKey.equals(COMMENT_TAG)) {
218232
// Set up some comments
219233
headerParser.setComment(section.get(0)[1]);

biojava-core/src/main/java/org/biojava/nbio/core/sequence/io/GenericGenbankHeaderParser.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,13 @@
2626
import org.biojava.nbio.core.exceptions.ParserException;
2727
import org.biojava.nbio.core.sequence.AccessionID;
2828
import org.biojava.nbio.core.sequence.io.template.SequenceHeaderParserInterface;
29+
import org.biojava.nbio.core.sequence.reference.AbstractReference;
2930
import org.biojava.nbio.core.sequence.template.AbstractSequence;
3031
import org.biojava.nbio.core.sequence.template.Compound;
3132

3233
import java.util.ArrayList;
34+
import java.util.List;
35+
3336
import org.biojava.nbio.core.sequence.DataSource;
3437

3538
public class GenericGenbankHeaderParser<S extends AbstractSequence<C>, C extends Compound> implements SequenceHeaderParserInterface<S,C> {
@@ -40,7 +43,8 @@ public class GenericGenbankHeaderParser<S extends AbstractSequence<C>, C extends
4043
@SuppressWarnings("unused")
4144
private int version;
4245
private boolean versionSeen;
43-
private ArrayList<String> comments = new ArrayList<String>();
46+
private ArrayList<String> comments = new ArrayList<>();
47+
private List<AbstractReference> references = new ArrayList<>();
4448

4549
/**
4650
* Parse the header and set the values in the sequence
@@ -53,6 +57,7 @@ public void parseHeader(String header, S sequence) {
5357
sequence.setAccession(new AccessionID(accession, DataSource.GENBANK, version, identifier));
5458
sequence.setDescription(description);
5559
sequence.setComments(comments);
60+
sequence.setReferences(references);
5661
}
5762

5863
/**
@@ -129,4 +134,8 @@ public void setComment(String comment) throws ParserException {
129134
if (comment==null) throw new ParserException("Comment cannot be null");
130135
this.comments.add(comment);
131136
}
137+
138+
public void addReference(AbstractReference abstractReference){
139+
this.references.add(abstractReference);
140+
}
132141
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
package org.biojava.nbio.core.sequence.reference;
2+
3+
/**
4+
* @Author Jim Tang
5+
*/
6+
public abstract class AbstractReference implements ReferenceInterface {
7+
8+
private String title;
9+
10+
private String authors;
11+
12+
private String journal;
13+
14+
@Override
15+
public String getTitle() {
16+
return title;
17+
}
18+
19+
@Override
20+
public void setTitle(String title) {
21+
this.title = title;
22+
}
23+
24+
@Override
25+
public String getAuthors() {
26+
return authors;
27+
}
28+
29+
@Override
30+
public void setAuthors(String authors) {
31+
this.authors = authors;
32+
}
33+
34+
@Override
35+
public String getJournal() {
36+
return journal;
37+
}
38+
39+
@Override
40+
public void setJournal(String journal) {
41+
this.journal = journal;
42+
}
43+
}
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
package org.biojava.nbio.core.sequence.reference;
2+
3+
/**
4+
* For Genbank format file only.
5+
*
6+
* @Author Jim Tang
7+
*/
8+
public class GenbankReference extends AbstractReference {
9+
10+
/**
11+
* The authors are a list of Inventors that retrieved from the Reference section.
12+
*/
13+
private String authors;
14+
15+
/**
16+
* The title that retrieved from the Reference section.
17+
*/
18+
private String title;
19+
20+
/**
21+
* The journal usually contains the Publication Number, Publication Date and Assignee
22+
*/
23+
private String journal;
24+
25+
/**
26+
* @return
27+
*/
28+
public String getAuthors() {
29+
return authors;
30+
}
31+
32+
/**
33+
* @param authors
34+
*/
35+
public void setAuthors(String authors) {
36+
this.authors = authors;
37+
}
38+
39+
/**
40+
* @return
41+
*/
42+
public String getTitle() {
43+
return title;
44+
}
45+
46+
/**
47+
* @param title
48+
*/
49+
public void setTitle(String title) {
50+
this.title = title;
51+
}
52+
53+
/**
54+
* @return
55+
*/
56+
public String getJournal() {
57+
return journal;
58+
}
59+
60+
/**
61+
* @param journal
62+
*/
63+
public void setJournal(String journal) {
64+
this.journal = journal;
65+
}
66+
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
package org.biojava.nbio.core.sequence.reference;
2+
3+
/**
4+
* @Author Jim Tang
5+
*/
6+
public interface ReferenceInterface {
7+
8+
/**
9+
* Set the title that retrieved from Reference section.
10+
*
11+
* @param title
12+
*/
13+
void setTitle(String title);
14+
15+
/**
16+
* Get the title that retrieved from Reference section.
17+
*
18+
* @return
19+
*/
20+
String getTitle();
21+
22+
/**
23+
* Set the authors that retrieved from Reference section.
24+
*
25+
* @param authors
26+
*/
27+
void setAuthors(String authors);
28+
29+
/**
30+
* Get the authors that retrieved from Reference section.
31+
*
32+
* @return
33+
*/
34+
String getAuthors();
35+
36+
/**
37+
* Set the journal that retrieved from Reference section.
38+
*
39+
* @param journal
40+
*/
41+
void setJournal(String journal);
42+
43+
/**
44+
* Get the journal that retrieved from Reference section.
45+
*
46+
* @return
47+
*/
48+
String getJournal();
49+
50+
}

biojava-core/src/main/java/org/biojava/nbio/core/sequence/template/AbstractSequence.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import org.biojava.nbio.core.sequence.location.SequenceLocation;
3737
import org.biojava.nbio.core.sequence.location.SimpleLocation;
3838
import org.biojava.nbio.core.sequence.location.template.Location;
39+
import org.biojava.nbio.core.sequence.reference.AbstractReference;
3940
import org.biojava.nbio.core.sequence.storage.ArrayListSequenceReader;
4041
import org.biojava.nbio.core.util.Equals;
4142
import org.slf4j.Logger;
@@ -74,6 +75,7 @@ public abstract class AbstractSequence<C extends Compound> implements Sequence<C
7475
private LinkedHashMap<String, ArrayList<FeatureInterface<AbstractSequence<C>, C>>> groupedFeatures =
7576
new LinkedHashMap<String, ArrayList<FeatureInterface<AbstractSequence<C>, C>>>();
7677
private List<String> comments = new ArrayList<>();
78+
private List<AbstractReference> references;
7779

7880
public AbstractSequence() {
7981
}
@@ -322,6 +324,22 @@ public void setSequenceScore(Double sequenceScore) {
322324
this.sequenceScore = sequenceScore;
323325
}
324326

327+
/**
328+
*
329+
* @return
330+
*/
331+
public List<AbstractReference> getReferences() {
332+
return references;
333+
}
334+
335+
/**
336+
*
337+
* @param references
338+
*/
339+
public void setReferences(List<AbstractReference> references) {
340+
this.references = references;
341+
}
342+
325343
/**
326344
* Return features at a sequence position by type
327345
* @param featureType

biojava-core/src/test/java/org/biojava/nbio/core/sequence/io/GenbankReaderTest.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,16 @@ public void testProcess() throws Exception {
110110
"publications that are available for this gene. Please see the\n" +
111111
"Entrez Gene record to access additional publications."));
112112

113+
assertThat(proteinSequences.get("NP_000257").getReferences().size(),is(11));
114+
assertThat(proteinSequences.get("NP_000257").getReferences().get(0).getAuthors(),
115+
is("Lev,D., Weigl,Y., Hasan,M., Gak,E., Davidovich,M., Vinkler,C.,\n" +
116+
"Leshinsky-Silver,E., Lerman-Sagie,T. and Watemberg,N."));
117+
assertThat(proteinSequences.get("NP_000257").getReferences().get(1).getTitle(),
118+
is("Novel mutations in Norrie disease gene in Japanese patients with\n" +
119+
"Norrie disease and familial exudative vitreoretinopathy"));
120+
assertThat(proteinSequences.get("NP_000257").getReferences().get(10).getJournal(),
121+
is("Nat. Genet. 1 (3), 199-203 (1992)"));
122+
113123
assertNotNull(proteinSequences);
114124
assertEquals(1, proteinSequences.size());
115125

0 commit comments

Comments
 (0)