Skip to content

Commit 43f32d1

Browse files
committed
Fixed issue: wasn't treating properly some CIF quoting cases
1 parent 9cd1949 commit 43f32d1

File tree

4 files changed

+68
-33
lines changed

4 files changed

+68
-33
lines changed

biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/SimpleMMcifParser.java

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -308,9 +308,9 @@ private List<String> processSingleLine(String line){
308308
if ( line.startsWith(STRING_LIMIT))
309309
return data;
310310
}
311-
boolean inString = false;
312-
boolean inS1 = false;
313-
boolean inS2 = false;
311+
boolean inString = false; // semicolon (;) quoting
312+
boolean inS1 = false; // single quote (') quoting
313+
boolean inS2 = false; // double quote (") quoting
314314
String word = "";
315315

316316
for (int i=0; i< line.length(); i++ ){
@@ -321,9 +321,9 @@ private List<String> processSingleLine(String line){
321321
if (i < line.length() - 1)
322322
nextC = line.charAt(i+1);
323323

324-
//Character lastC = null;
325-
//if (i>0)
326-
// lastC = line.charAt(i-1);
324+
Character lastC = null;
325+
if (i>0)
326+
lastC = line.charAt(i-1);
327327

328328
if (c == ' ') {
329329

@@ -364,10 +364,12 @@ private List<String> processSingleLine(String line){
364364
word += c;
365365
}
366366

367-
} else {
367+
} else if (lastC==null || lastC==' ') {
368368
// the beginning of a new string
369369
inString = true;
370370
inS1 = true;
371+
} else {
372+
word += c;
371373
}
372374
} else if ( c == S2 ){
373375
if ( inString){
@@ -394,10 +396,12 @@ private List<String> processSingleLine(String line){
394396
} else {
395397
word += c;
396398
}
397-
} else {
399+
} else if (lastC==null || lastC==' ') {
398400
// the beginning of a new string
399401
inString = true;
400402
inS2 = true;
403+
} else {
404+
word += c;
401405
}
402406
} else {
403407
word += c;

biojava-structure/src/test/java/org/biojava/nbio/structure/io/TestDifficultMmCIFFiles.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ public void test4letterChains() throws IOException, StructureException, URISynta
172172
* For instance Phenix produces mmCIF files with non-quoted strings containing single quote characters
173173
* @throws IOException
174174
*/
175-
//@Test
175+
@Test
176176
public void testQuotingCornerCase () throws IOException {
177177
InputStream inStream = this.getClass().getResourceAsStream("/org/biojava/nbio/structure/io/difficult_mmcif_quoting.cif");
178178
MMcifParser parser = new SimpleMMcifParser();

biojava-structure/src/test/java/org/biojava/nbio/structure/io/TestNonDepositedFiles.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
import org.biojava.nbio.structure.io.mmcif.SimpleMMcifConsumer;
3030
import org.biojava.nbio.structure.io.mmcif.SimpleMMcifParser;
3131
import org.biojava.nbio.structure.xtal.CrystalCell;
32-
import org.junit.Ignore;
3332
import org.junit.Test;
3433

3534
import java.io.BufferedReader;
@@ -192,10 +191,10 @@ private void checkChains(Structure s) {
192191
/**
193192
* A test for reading a phenix-produced (ver 1.9_1692) mmCIF file.
194193
* This is the file submitted to the PDB for deposition of entry 4lup
194+
* See github issue #234
195195
* @throws IOException
196196
*/
197-
@Ignore // remove once issue #234 is fixed
198-
@Test
197+
//@Test
199198
public void testPhenixFile() throws IOException {
200199
InputStream inStream = new GZIPInputStream(this.getClass().getResourceAsStream("/org/biojava/nbio/structure/io/4lup_phenix_output.cif.gz"));
201200
MMcifParser parser = new SimpleMMcifParser();
Lines changed: 53 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
data_4LUP_subset_no_quotes
1+
data_
22
loop_
33
_atom_site.group_PDB
44
_atom_site.id
@@ -14,27 +14,59 @@ _atom_site.Cartn_x
1414
_atom_site.Cartn_y
1515
_atom_site.Cartn_z
1616
_atom_site.occupancy
17-
_atom_site.B_iso_or_equiv
18-
_atom_site.Cartn_x_esd
19-
_atom_site.Cartn_y_esd
20-
_atom_site.Cartn_z_esd
21-
_atom_site.occupancy_esd
22-
_atom_site.B_iso_or_equiv_esd
23-
_atom_site.pdbx_formal_charge
17+
_atom_site.B_iso_or_equiv
2418
_atom_site.auth_seq_id
25-
_atom_site.auth_comp_id
19+
_atom_site.auth_comp_id
2620
_atom_site.auth_asym_id
2721
_atom_site.auth_atom_id
2822
_atom_site.pdbx_PDB_model_num
29-
ATOM 1727 P P . DT C 2 1 ? 7.887 18.595 87.913 0.00 20.67 ? ? ? ? ? ? 107 DT B P 1
30-
ATOM 1728 O OP1 . DT C 2 1 ? 7.732 19.982 88.407 0.00 20.42 ? ? ? ? ? ? 107 DT B OP1 1
31-
ATOM 1729 O OP2 . DT C 2 1 ? 7.210 17.480 88.610 0.00 20.42 ? ? ? ? ? ? 107 DT B OP2 1
32-
ATOM 1730 O O5' . DT C 2 1 ? 7.464 18.547 86.371 0.00 21.57 ? ? ? ? ? ? 107 DT B O5' 1
33-
ATOM 1731 C C5' . DT C 2 1 ? 6.642 19.572 85.828 0.00 23.49 ? ? ? ? ? ? 107 DT B C5' 1
34-
ATOM 1732 C C4' . DT C 2 1 ? 6.943 19.784 84.356 0.00 27.33 ? ? ? ? ? ? 107 DT B C4' 1
35-
ATOM 1733 O O4' . DT C 2 1 ? 8.382 19.831 84.159 0.00 28.38 ? ? ? ? ? ? 107 DT B O4' 1
36-
ATOM 1734 C C3' . DT C 2 1 ? 6.438 18.687 83.425 1.00 31.54 ? ? ? ? ? ? 107 DT B C3' 1
37-
ATOM 1735 O O3' . DT C 2 1 ? 6.115 19.248 82.157 1.00 34.97 ? ? ? ? ? ? 107 DT B O3' 1
38-
ATOM 1736 C C2' . DT C 2 1 ? 7.641 17.758 83.333 1.00 31.47 ? ? ? ? ? ? 107 DT B C2' 1
39-
ATOM 1737 C C1' . DT C 2 1 ? 8.790 18.756 83.333 1.00 31.55 ? ? ? ? ? ? 107 DT B C1' 1
40-
#
23+
ATOM 1728 O OP1 . DT C 2 1 ? 7.732 19.982 88.407 0.00 20.42 107 DT B OP1 1
24+
ATOM 1730 O O5' . DT C 2 1 ? 7.464 18.547 86.371 0.00 21.57 107 DT B O5' 1
25+
ATOM 1738 C H2" . DT C 2 1 ? 8.111 19.111 84.111 0.00 29.00 107 DT B H2" 1
26+
ATOM 1730 O "O3'" . DT C 2 1 ? 7.111 18.111 86.111 0.00 21.00 107 DT B "O3'" 1
27+
#
28+
loop_
29+
_audit_author.name
30+
_audit_author.pdbx_ordinal
31+
'Pederson, D.M.' 1
32+
'Welsh, L.C.' 2
33+
"Marvin, D.A." 3
34+
#
35+
loop_
36+
_pdbx_database_related.db_name
37+
_pdbx_database_related.db_id
38+
_pdbx_database_related.content_type
39+
_pdbx_database_related.details
40+
PDB 1IFP unspecified 'INOVIRUS (FILAMENTOUS BACTERIOPHAGE) STRAIN PF3 MAJOR COATPROTEIN ASSEMBLY'
41+
PDB 1QL1 unspecified 'INOVIRUS (FILAMENTOUS BACTERIOPHAGE) STRAIN PF1 MAJOR COAT PROTEIN ASSEMBLY'
42+
#
43+
loop_
44+
_citation.id
45+
_citation.title
46+
_citation.journal_abbrev
47+
_citation.journal_volume
48+
_citation.page_first
49+
_citation.page_last
50+
_citation.year
51+
_citation.journal_id_ASTM
52+
_citation.country
53+
_citation.journal_id_ISSN
54+
_citation.journal_id_CSD
55+
_citation.book_publisher
56+
_citation.pdbx_database_id_PubMed
57+
_citation.pdbx_database_id_DOI
58+
primary 'The Protein Capsid of Filamentous Bacteriophage Ph75 from Thermus Thermophilus'
59+
J.Mol.Biol. 309 401 ? 2001 JMOBAK UK 0022-2836 0070 ? 11371161 10.1006/JMBI.2001.4685
60+
1 'The Molecular Structure and Structural Transition of the Alpha-Helical Capsid in Filamentous Bacteriophage Pf1'
61+
'Acta Crystallogr.,Sect.D' 56 137 ? 2000 ABCRE6 DK 0907-4449 0766 ? 10666593 10.1107/S0907444999015334
62+
2 'Structure of the Capsid of Pf3 Filamentous Phage Determined from X-Ray Fibre Diffraction Data at 3.1 A Resolution'
63+
J.Mol.Biol. 283 155 ? 1998 JMOBAK UK 0022-2836 0070 ? 9761681 10.1006/JMBI.1998.2081
64+
#
65+
loop_
66+
_citation_author.citation_id
67+
_citation_author.name
68+
_citation_author.ordinal
69+
primary 'Pederson, D.M.' 1
70+
primary "Welsh, L.C." 2
71+
primary 'Marvin, D.A.' 3
72+
#

0 commit comments

Comments
 (0)