1+ /*
2+ * BioJava development code
3+ *
4+ * This code may be freely distributed and modified under the
5+ * terms of the GNU Lesser General Public Licence. This should
6+ * be distributed with the code. If you do not have a copy,
7+ * see:
8+ *
9+ * http://www.gnu.org/copyleft/lesser.html
10+ *
11+ * Copyright for this code is held jointly by the individual
12+ * authors. These should be listed in @author doc comments.
13+ *
14+ * For more information on the BioJava project and its aims,
15+ * or to join the biojava-l mailing list, visit the home page
16+ * at:
17+ *
18+ * http://www.biojava.org/
19+ *
20+ */
121package org .biojava .nbio .core .sequence .io .embl ;
222
323
424import java .io .*;
25+ import java .util .Arrays ;
526import java .util .LinkedList ;
6- import java . util . List ;
27+
728
829/**
930 * This class should process the data of embl file
10- * @since 5.0.0
31+ *
1132 * @author Noor Aldeen Al Mbaidin
33+ * @since 5.0.0
1234 */
1335public class EmblReader {
1436
15- private StringBuilder sequence = new StringBuilder ("" );
16-
17- public EmblReader () {
18-
19- }
20-
2137 /**
2238 * The parsing is done in this method.<br>
2339 * This method tries to process all the Embl records
@@ -31,14 +47,15 @@ public static EmblRecord process(File file) throws IOException {
3147
3248 EmblRecord emblRecord = new EmblRecord ();
3349 StringBuilder sequence = new StringBuilder ("" );
50+ LinkedList <EmblReference > emblReferences = new LinkedList <>();
3451 EmblReference emblReference = new EmblReference ();
3552 LinkedList <String > accessionNumber = new LinkedList <>();
3653 LinkedList <String > keyword = new LinkedList <>();
3754
3855 if (file == null )
3956 throw new NullPointerException ("file can't be null" );
4057
41- if (file .isDirectory ())
58+ if (file .isDirectory ())
4259 throw new IllegalArgumentException ("the file can't be a directory" );
4360
4461 try (FileReader fileReader = new FileReader (file )) {
@@ -47,54 +64,56 @@ public static EmblRecord process(File file) throws IOException {
4764 String lineInfo ;
4865 try (BufferedReader bufferedReader = new BufferedReader (fileReader )) {
4966 while ((line = bufferedReader .readLine ()) != null ) {
50- lineInfo = line .substring (0 , 2 );
51- lineIdentifier = line .substring (0 , 2 );
52- if (lineIdentifier .equals ("ID" ))
53- emblRecord .setEmblId (populateID (line ));
54- else if (lineIdentifier .equals ("AC" ))
55- populateAccessionNumber (line , accessionNumber );
56- else if (lineIdentifier .equals ("DT" ) && line .contains ("Created" ))
57- emblRecord .setCreatedDate (lineInfo );
58- else if (lineIdentifier .equals ("DT" ) && line .contains ("updated" ))
59- emblRecord .setLastUpdatedDate (lineInfo );
60- else if (lineIdentifier .equals ("DE" ))
61- emblRecord .setSequenceDescription (lineInfo );
62- else if (lineIdentifier .equals ("KW" ))
63- keyword .add (lineInfo );
64- else if (lineIdentifier .equals ("OS" ))
65- emblRecord .setOrganismSpecies (lineInfo );
66- else if (lineIdentifier .equals ("OC" ))
67- emblRecord .setOrganismClassification (lineInfo );
68- else if (lineIdentifier .equals ("OG" ))
69- emblRecord .setOrGanelle (lineInfo );
70- else if (lineIdentifier .equals ("RN" ) || lineIdentifier .equals ("RP" )
71- || lineIdentifier .equals ("RX" ) || lineIdentifier .equals ("RG" )
72- || lineIdentifier .equals ("RA" ) || lineIdentifier .equals ("RT" )
73- || lineIdentifier .equals ("RL" ))
74- populateEmblReference (lineIdentifier , lineInfo , emblReference );
75- else if (lineIdentifier .equals ("DR" ))
76- emblRecord .setDatabaseCrossReference (lineInfo );
77- else if (lineIdentifier .equals ("AH" ))
78- emblRecord .setAssemblyHeader (lineInfo );
79- else if (lineIdentifier .equals ("AS" ))
80- emblRecord .setAssemblyInformation (lineInfo );
81- else if (lineIdentifier .equals ("CO" ))
82- emblRecord .setConstructedSequence (lineInfo );
83- else if (lineIdentifier .equals ("FH" ))
84- emblRecord .setFeatureHeader (lineInfo );
85- else if (lineIdentifier .equals ("FT" ))
86- emblRecord .setFeatureTable (lineInfo );
87- else if (lineIdentifier .equals ("SQ" ))
88- emblRecord .setSequenceHeader (lineInfo );
89- else if (lineIdentifier .equals (" " ) && !lineIdentifier .equals ("//" ))
90- populateSequence (line , sequence );
91- else if (lineIdentifier .equals ("//" )) {
92- emblRecord .setKeyword (keyword );
93- emblRecord .setEmblReference (emblReference );
94- emblRecord .setAccessionNumber (accessionNumber );
95- emblRecord .setSequence (sequence .toString ());
96- }
67+ if (line .length () > 1 ) {
68+ lineInfo = line .substring (2 , line .length ()).trim ();
69+ lineIdentifier = line .substring (0 , 2 );
70+ if (lineIdentifier .equals ("ID" ))
71+ emblRecord .setEmblId (populateID (lineInfo ));
72+ else if (lineIdentifier .equals ("AC" ))
73+ populateAccessionNumber (line , accessionNumber );
74+ else if (lineIdentifier .equals ("DT" ) && line .contains ("Created" ))
75+ emblRecord .setCreatedDate (lineInfo );
76+ else if (lineIdentifier .equals ("DT" ) && line .contains ("updated" ))
77+ emblRecord .setLastUpdatedDate (lineInfo );
78+ else if (lineIdentifier .equals ("DE" ))
79+ emblRecord .setSequenceDescription (lineInfo );
80+ else if (lineIdentifier .equals ("KW" ))
81+ keyword .add (lineInfo );
82+ else if (lineIdentifier .equals ("OS" ))
83+ emblRecord .setOrganismSpecies (lineInfo );
84+ else if (lineIdentifier .equals ("OC" ))
85+ emblRecord .setOrganismClassification (lineInfo );
86+ else if (lineIdentifier .equals ("OG" ))
87+ emblRecord .setOrGanelle (lineInfo );
88+ else if (lineIdentifier .equals ("RN" ) || lineIdentifier .equals ("RP" )
89+ || lineIdentifier .equals ("RX" ) || lineIdentifier .equals ("RG" )
90+ || lineIdentifier .equals ("RA" ) || lineIdentifier .equals ("RT" )
91+ || lineIdentifier .equals ("RL" ))
92+ populateEmblReferences (lineIdentifier , lineInfo , emblReference , emblReferences );
93+ else if (lineIdentifier .equals ("DR" ))
94+ emblRecord .setDatabaseCrossReference (lineInfo );
95+ else if (lineIdentifier .equals ("AH" ))
96+ emblRecord .setAssemblyHeader (lineInfo );
97+ else if (lineIdentifier .equals ("AS" ))
98+ emblRecord .setAssemblyInformation (lineInfo );
99+ else if (lineIdentifier .equals ("CO" ))
100+ emblRecord .setConstructedSequence (lineInfo );
101+ else if (lineIdentifier .equals ("FH" ))
102+ emblRecord .setFeatureHeader (lineInfo );
103+ else if (lineIdentifier .equals ("FT" ))
104+ emblRecord .setFeatureTable (lineInfo );
105+ else if (lineIdentifier .equals ("SQ" ))
106+ emblRecord .setSequenceHeader (lineInfo );
107+ else if (lineIdentifier .equals (" " ) && !lineIdentifier .equals ("//" ))
108+ populateSequence (line , sequence );
109+ else if (lineIdentifier .equals ("//" )) {
110+ emblRecord .setKeyword (keyword );
111+ emblRecord .setEmblReference (emblReferences );
112+ emblRecord .setAccessionNumber (accessionNumber );
113+ emblRecord .setSequence (sequence .toString ());
114+ }
97115
116+ }
98117 }
99118 }
100119 }
@@ -108,7 +127,8 @@ private static void populateSequence(String line, StringBuilder sequence) {
108127 sequence .append (sequenceLine );
109128 }
110129
111- private static void populateEmblReference (String lineIdentifier , String lineInfo , EmblReference emblReference ) {
130+ private static void populateEmblReferences (String lineIdentifier , String lineInfo , EmblReference emblReference
131+ , LinkedList <EmblReference > emblReferences ) {
112132 if (lineIdentifier .equals ("RN" ))
113133 emblReference .setReferenceNumber (lineInfo );
114134 else if (lineIdentifier .equals ("RP" ))
@@ -121,25 +141,21 @@ else if (lineIdentifier.equals("RA"))
121141 emblReference .setReferenceAuthor (lineInfo );
122142 else if (lineIdentifier .equals ("RT" ))
123143 emblReference .setReferenceTitle (lineInfo );
124- else if (lineIdentifier .equals ("RL" ))
144+ else if (lineIdentifier .equals ("RL" )) {
125145 emblReference .setReferenceLocation (lineInfo );
146+ emblReferences .add (emblReference .copyEmblReference (emblReference ));
147+ }
126148 }
127149
128150 private static void populateAccessionNumber (String line , LinkedList <String > accessionNumber ) {
129151 accessionNumber .add (line );
130152 }
131153
132154 private static EmblId populateID (String line ) {
133- EmblId emblId = new EmblId ();
134- line .replace ("," , "" );
135- String [] strings = line .split (" " );
136- emblId .setPrimaryAccession (strings [1 ]);
137- emblId .setSequenceVersion (strings [2 ]);
138- emblId .setTopology (strings [3 ]);
139- emblId .setMoleculeType (strings [4 ]);
140- emblId .setDataClass (strings [5 ]);
141- emblId .setTaxonomicDivision (strings [6 ]);
142- emblId .setSequenceLength (strings [7 ]);
155+ String [] strings = line .split (";" );
156+ Arrays .stream (strings ).map (String ::trim ).toArray (unused -> strings );
157+ EmblId emblId = new EmblId (strings [0 ], strings [1 ], strings [2 ]
158+ , strings [3 ], strings [4 ], strings [5 ], strings [6 ]);
143159 return emblId ;
144160 }
145161
0 commit comments