Skip to content

Commit 40ddf40

Browse files
committed
insert license header
1 parent 9673da4 commit 40ddf40

File tree

6 files changed

+367
-135
lines changed

6 files changed

+367
-135
lines changed
Lines changed: 45 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,28 @@
1+
/*
2+
* BioJava development code
3+
*
4+
* This code may be freely distributed and modified under the
5+
* terms of the GNU Lesser General Public Licence. This should
6+
* be distributed with the code. If you do not have a copy,
7+
* see:
8+
*
9+
* http://www.gnu.org/copyleft/lesser.html
10+
*
11+
* Copyright for this code is held jointly by the individual
12+
* authors. These should be listed in @author doc comments.
13+
*
14+
* For more information on the BioJava project and its aims,
15+
* or to join the biojava-l mailing list, visit the home page
16+
* at:
17+
*
18+
* http://www.biojava.org/
19+
*
20+
*/
121
package org.biojava.nbio.core.sequence.io.embl;
222

23+
import jdk.nashorn.internal.ir.annotations.Immutable;
24+
325
/**
4-
*
526
* This class contains the processed data of embl file
627
* Primary accession number
728
* Sequence version number
@@ -10,99 +31,83 @@
1031
* Data class
1132
* Taxonomic division
1233
* Sequence length
13-
* @since 5.0.0
34+
*
1435
* @author Noor Aldeen Al Mbaidin
36+
* @since 5.0.0
1537
*/
38+
@Immutable
1639
public class EmblId {
1740

1841

19-
private String primaryAccession;
20-
private String sequenceVersion;
21-
private String topology;
22-
private String moleculeType;
23-
private String dataClass;
24-
private String taxonomicDivision;
25-
private String sequenceLength;
42+
private final String primaryAccession;
43+
private final String sequenceVersion;
44+
private final String topology;
45+
private final String moleculeType;
46+
private final String dataClass;
47+
private final String taxonomicDivision;
48+
private final String sequenceLength;
2649

27-
public EmblId() {
50+
public EmblId(String primaryAccession, String sequenceVersion, String topology,
51+
String moleculeType, String dataClass, String taxonomicDivision,
52+
String sequenceLength) {
53+
this.primaryAccession = primaryAccession;
54+
this.sequenceVersion = sequenceVersion;
55+
this.topology = topology;
56+
this.moleculeType = moleculeType;
57+
this.dataClass = dataClass;
58+
this.taxonomicDivision = taxonomicDivision;
59+
this.sequenceLength = sequenceLength;
2860
}
2961

3062
/**
31-
*
3263
* @return String
3364
*/
3465
public String getPrimaryAccession() {
3566
return primaryAccession;
3667
}
3768

38-
public void setPrimaryAccession(String primaryAccession) {
39-
this.primaryAccession = primaryAccession;
40-
}
41-
4269
/**
4370
* return the sequence version
71+
*
4472
* @return String
4573
*/
4674
public String getSequenceVersion() {
4775
return sequenceVersion;
4876
}
4977

50-
public void setSequenceVersion(String sequenceVersion) {
51-
this.sequenceVersion = sequenceVersion;
52-
}
53-
5478
public String getTopology() {
5579
return topology;
5680
}
5781

58-
public void setTopology(String topology) {
59-
this.topology = topology;
60-
}
61-
6282
/**
6383
* Molecule type this represents the type of molecule as stored
84+
*
6485
* @return String
6586
*/
6687
public String getMoleculeType() {
6788
return moleculeType;
6889
}
6990

70-
public void setMoleculeType(String moleculeType) {
71-
this.moleculeType = moleculeType;
72-
}
73-
7491
public String getDataClass() {
7592
return dataClass;
7693
}
7794

78-
public void setDataClass(String dataClass) {
79-
this.dataClass = dataClass;
80-
}
81-
8295
/**
83-
*
8496
* @return String
8597
*/
8698
public String getTaxonomicDivision() {
8799
return taxonomicDivision;
88100
}
89101

90-
public void setTaxonomicDivision(String taxonomicDivision) {
91-
this.taxonomicDivision = taxonomicDivision;
92-
}
93-
94102
/**
95103
* Sequence length The last item on the ID line is the length of the
96104
* sequence (the total number of bases in the sequence). This number includes
97105
* base positions reported as present but undetermined (coded as "N").
106+
*
98107
* @return String
99108
*/
100109
public String getSequenceLength() {
101110
return sequenceLength;
102111
}
103112

104-
public void setSequenceLength(String sequenceLength) {
105-
this.sequenceLength = sequenceLength;
106-
}
107-
108113
}

biojava-core/src/main/java/org/biojava/nbio/core/sequence/io/embl/EmblReader.java

Lines changed: 84 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,39 @@
1+
/*
2+
* BioJava development code
3+
*
4+
* This code may be freely distributed and modified under the
5+
* terms of the GNU Lesser General Public Licence. This should
6+
* be distributed with the code. If you do not have a copy,
7+
* see:
8+
*
9+
* http://www.gnu.org/copyleft/lesser.html
10+
*
11+
* Copyright for this code is held jointly by the individual
12+
* authors. These should be listed in @author doc comments.
13+
*
14+
* For more information on the BioJava project and its aims,
15+
* or to join the biojava-l mailing list, visit the home page
16+
* at:
17+
*
18+
* http://www.biojava.org/
19+
*
20+
*/
121
package org.biojava.nbio.core.sequence.io.embl;
222

323

424
import java.io.*;
25+
import java.util.Arrays;
526
import java.util.LinkedList;
6-
import java.util.List;
27+
728

829
/**
930
* This class should process the data of embl file
10-
* @since 5.0.0
31+
*
1132
* @author Noor Aldeen Al Mbaidin
33+
* @since 5.0.0
1234
*/
1335
public class EmblReader {
1436

15-
private StringBuilder sequence = new StringBuilder("");
16-
17-
public EmblReader() {
18-
19-
}
20-
2137
/**
2238
* The parsing is done in this method.<br>
2339
* This method tries to process all the Embl records
@@ -31,14 +47,15 @@ public static EmblRecord process(File file) throws IOException {
3147

3248
EmblRecord emblRecord = new EmblRecord();
3349
StringBuilder sequence = new StringBuilder("");
50+
LinkedList<EmblReference> emblReferences = new LinkedList<>();
3451
EmblReference emblReference = new EmblReference();
3552
LinkedList<String> accessionNumber = new LinkedList<>();
3653
LinkedList<String> keyword = new LinkedList<>();
3754

3855
if (file == null)
3956
throw new NullPointerException("file can't be null");
4057

41-
if(file.isDirectory())
58+
if (file.isDirectory())
4259
throw new IllegalArgumentException("the file can't be a directory");
4360

4461
try (FileReader fileReader = new FileReader(file)) {
@@ -47,54 +64,56 @@ public static EmblRecord process(File file) throws IOException {
4764
String lineInfo;
4865
try (BufferedReader bufferedReader = new BufferedReader(fileReader)) {
4966
while ((line = bufferedReader.readLine()) != null) {
50-
lineInfo = line.substring(0, 2);
51-
lineIdentifier = line.substring(0, 2);
52-
if (lineIdentifier.equals("ID"))
53-
emblRecord.setEmblId(populateID(line));
54-
else if (lineIdentifier.equals("AC"))
55-
populateAccessionNumber(line, accessionNumber);
56-
else if (lineIdentifier.equals("DT") && line.contains("Created"))
57-
emblRecord.setCreatedDate(lineInfo);
58-
else if (lineIdentifier.equals("DT") && line.contains("updated"))
59-
emblRecord.setLastUpdatedDate(lineInfo);
60-
else if (lineIdentifier.equals("DE"))
61-
emblRecord.setSequenceDescription(lineInfo);
62-
else if (lineIdentifier.equals("KW"))
63-
keyword.add(lineInfo);
64-
else if (lineIdentifier.equals("OS"))
65-
emblRecord.setOrganismSpecies(lineInfo);
66-
else if (lineIdentifier.equals("OC"))
67-
emblRecord.setOrganismClassification(lineInfo);
68-
else if (lineIdentifier.equals("OG"))
69-
emblRecord.setOrGanelle(lineInfo);
70-
else if (lineIdentifier.equals("RN") || lineIdentifier.equals("RP")
71-
|| lineIdentifier.equals("RX") || lineIdentifier.equals("RG")
72-
|| lineIdentifier.equals("RA") || lineIdentifier.equals("RT")
73-
|| lineIdentifier.equals("RL"))
74-
populateEmblReference(lineIdentifier, lineInfo, emblReference);
75-
else if (lineIdentifier.equals("DR"))
76-
emblRecord.setDatabaseCrossReference(lineInfo);
77-
else if (lineIdentifier.equals("AH"))
78-
emblRecord.setAssemblyHeader(lineInfo);
79-
else if (lineIdentifier.equals("AS"))
80-
emblRecord.setAssemblyInformation(lineInfo);
81-
else if (lineIdentifier.equals("CO"))
82-
emblRecord.setConstructedSequence(lineInfo);
83-
else if (lineIdentifier.equals("FH"))
84-
emblRecord.setFeatureHeader(lineInfo);
85-
else if (lineIdentifier.equals("FT"))
86-
emblRecord.setFeatureTable(lineInfo);
87-
else if (lineIdentifier.equals("SQ"))
88-
emblRecord.setSequenceHeader(lineInfo);
89-
else if (lineIdentifier.equals(" ") && !lineIdentifier.equals("//"))
90-
populateSequence(line, sequence);
91-
else if (lineIdentifier.equals("//")) {
92-
emblRecord.setKeyword(keyword);
93-
emblRecord.setEmblReference(emblReference);
94-
emblRecord.setAccessionNumber(accessionNumber);
95-
emblRecord.setSequence(sequence.toString());
96-
}
67+
if (line.length() > 1) {
68+
lineInfo = line.substring(2, line.length()).trim();
69+
lineIdentifier = line.substring(0, 2);
70+
if (lineIdentifier.equals("ID"))
71+
emblRecord.setEmblId(populateID(lineInfo));
72+
else if (lineIdentifier.equals("AC"))
73+
populateAccessionNumber(line, accessionNumber);
74+
else if (lineIdentifier.equals("DT") && line.contains("Created"))
75+
emblRecord.setCreatedDate(lineInfo);
76+
else if (lineIdentifier.equals("DT") && line.contains("updated"))
77+
emblRecord.setLastUpdatedDate(lineInfo);
78+
else if (lineIdentifier.equals("DE"))
79+
emblRecord.setSequenceDescription(lineInfo);
80+
else if (lineIdentifier.equals("KW"))
81+
keyword.add(lineInfo);
82+
else if (lineIdentifier.equals("OS"))
83+
emblRecord.setOrganismSpecies(lineInfo);
84+
else if (lineIdentifier.equals("OC"))
85+
emblRecord.setOrganismClassification(lineInfo);
86+
else if (lineIdentifier.equals("OG"))
87+
emblRecord.setOrGanelle(lineInfo);
88+
else if (lineIdentifier.equals("RN") || lineIdentifier.equals("RP")
89+
|| lineIdentifier.equals("RX") || lineIdentifier.equals("RG")
90+
|| lineIdentifier.equals("RA") || lineIdentifier.equals("RT")
91+
|| lineIdentifier.equals("RL"))
92+
populateEmblReferences(lineIdentifier, lineInfo, emblReference, emblReferences);
93+
else if (lineIdentifier.equals("DR"))
94+
emblRecord.setDatabaseCrossReference(lineInfo);
95+
else if (lineIdentifier.equals("AH"))
96+
emblRecord.setAssemblyHeader(lineInfo);
97+
else if (lineIdentifier.equals("AS"))
98+
emblRecord.setAssemblyInformation(lineInfo);
99+
else if (lineIdentifier.equals("CO"))
100+
emblRecord.setConstructedSequence(lineInfo);
101+
else if (lineIdentifier.equals("FH"))
102+
emblRecord.setFeatureHeader(lineInfo);
103+
else if (lineIdentifier.equals("FT"))
104+
emblRecord.setFeatureTable(lineInfo);
105+
else if (lineIdentifier.equals("SQ"))
106+
emblRecord.setSequenceHeader(lineInfo);
107+
else if (lineIdentifier.equals(" ") && !lineIdentifier.equals("//"))
108+
populateSequence(line, sequence);
109+
else if (lineIdentifier.equals("//")) {
110+
emblRecord.setKeyword(keyword);
111+
emblRecord.setEmblReference(emblReferences);
112+
emblRecord.setAccessionNumber(accessionNumber);
113+
emblRecord.setSequence(sequence.toString());
114+
}
97115

116+
}
98117
}
99118
}
100119
}
@@ -108,7 +127,8 @@ private static void populateSequence(String line, StringBuilder sequence) {
108127
sequence.append(sequenceLine);
109128
}
110129

111-
private static void populateEmblReference(String lineIdentifier, String lineInfo, EmblReference emblReference) {
130+
private static void populateEmblReferences(String lineIdentifier, String lineInfo, EmblReference emblReference
131+
, LinkedList<EmblReference> emblReferences) {
112132
if (lineIdentifier.equals("RN"))
113133
emblReference.setReferenceNumber(lineInfo);
114134
else if (lineIdentifier.equals("RP"))
@@ -121,25 +141,21 @@ else if (lineIdentifier.equals("RA"))
121141
emblReference.setReferenceAuthor(lineInfo);
122142
else if (lineIdentifier.equals("RT"))
123143
emblReference.setReferenceTitle(lineInfo);
124-
else if (lineIdentifier.equals("RL"))
144+
else if (lineIdentifier.equals("RL")) {
125145
emblReference.setReferenceLocation(lineInfo);
146+
emblReferences.add(emblReference.copyEmblReference(emblReference));
147+
}
126148
}
127149

128150
private static void populateAccessionNumber(String line, LinkedList<String> accessionNumber) {
129151
accessionNumber.add(line);
130152
}
131153

132154
private static EmblId populateID(String line) {
133-
EmblId emblId = new EmblId();
134-
line.replace(",", "");
135-
String[] strings = line.split(" ");
136-
emblId.setPrimaryAccession(strings[1]);
137-
emblId.setSequenceVersion(strings[2]);
138-
emblId.setTopology(strings[3]);
139-
emblId.setMoleculeType(strings[4]);
140-
emblId.setDataClass(strings[5]);
141-
emblId.setTaxonomicDivision(strings[6]);
142-
emblId.setSequenceLength(strings[7]);
155+
String[] strings = line.split(";");
156+
Arrays.stream(strings).map(String::trim).toArray(unused -> strings);
157+
EmblId emblId = new EmblId(strings[0], strings[1], strings[2]
158+
, strings[3], strings[4], strings[5], strings[6]);
143159
return emblId;
144160
}
145161

0 commit comments

Comments
 (0)