From 805a9023b78a4f5033bccb80a43c5529d389c8e0 Mon Sep 17 00:00:00 2001 From: Jose Duarte Date: Fri, 1 Mar 2024 22:19:05 -0800 Subject: [PATCH 1/3] Be more lenient with some missing atom_site fields --- .../io/cif/CifStructureConsumerImpl.java | 26 ++++++++++++++++--- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/cif/CifStructureConsumerImpl.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/cif/CifStructureConsumerImpl.java index fb3214be96..316cf2d8fe 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/cif/CifStructureConsumerImpl.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/cif/CifStructureConsumerImpl.java @@ -217,7 +217,10 @@ public void consumeAtomSite(AtomSite atomSite) { isHetAtmInFile = true; } - String insCodeString = pdbxPDBInsCode.get(atomIndex); + String insCodeString = null; + if (pdbxPDBInsCode.isDefined()) { + insCodeString = pdbxPDBInsCode.get(atomIndex); + } Character insCode = null; if (insCodeString != null && !insCodeString.isEmpty() && !"?".equals(insCodeString)) { insCode = insCodeString.charAt(0); @@ -246,7 +249,13 @@ public void consumeAtomSite(AtomSite atomSite) { } String asymId = labelAsymId.get(atomIndex); - String authId = authAsymId.get(atomIndex); + String authId; + if (authAsymId.isDefined()) { + authId = authAsymId.get(atomIndex); + } else { + authId = String.valueOf(asymId); + } + if (currentChain == null) { currentChain = new ChainImpl(); currentChain.setName(authId); @@ -277,7 +286,13 @@ public void consumeAtomSite(AtomSite atomSite) { } } - ResidueNumber residueNumber = new ResidueNumber(authId, authSeqId.get(atomIndex), insCode); + int authSeqIdInt; + if (authSeqId.isDefined()) { + authSeqIdInt = authSeqId.get(atomIndex); + } else { + authSeqIdInt = (int)seqId; + } + ResidueNumber residueNumber = new ResidueNumber(authId, authSeqIdInt, insCode); String recordName = groupPDB.get(atomIndex); String compId = labelCompId.get(atomIndex); @@ -289,7 +304,10 @@ public void consumeAtomSite(AtomSite atomSite) { } Group altGroup = null; - String altLocation = labelAltId.get(atomIndex); + String altLocation = null; + if (labelAltId.isDefined()) { + altLocation = labelAltId.get(atomIndex); + } if (startOfNewChain) { currentGroup = createGroup(recordName, oneLetterCode, compId, seqId); From ac14da470e97b53b504583d5ea245445a807713e Mon Sep 17 00:00:00 2001 From: Jose Duarte Date: Fri, 1 Mar 2024 22:28:24 -0800 Subject: [PATCH 2/3] Better with ternary if --- .../io/cif/CifStructureConsumerImpl.java | 26 +++++-------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/cif/CifStructureConsumerImpl.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/cif/CifStructureConsumerImpl.java index 316cf2d8fe..9393e285c3 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/cif/CifStructureConsumerImpl.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/cif/CifStructureConsumerImpl.java @@ -217,10 +217,8 @@ public void consumeAtomSite(AtomSite atomSite) { isHetAtmInFile = true; } - String insCodeString = null; - if (pdbxPDBInsCode.isDefined()) { - insCodeString = pdbxPDBInsCode.get(atomIndex); - } + String insCodeString = pdbxPDBInsCode.isDefined()? pdbxPDBInsCode.get(atomIndex) : null; + Character insCode = null; if (insCodeString != null && !insCodeString.isEmpty() && !"?".equals(insCodeString)) { insCode = insCodeString.charAt(0); @@ -249,12 +247,7 @@ public void consumeAtomSite(AtomSite atomSite) { } String asymId = labelAsymId.get(atomIndex); - String authId; - if (authAsymId.isDefined()) { - authId = authAsymId.get(atomIndex); - } else { - authId = String.valueOf(asymId); - } + String authId = authAsymId.isDefined()? authAsymId.get(atomIndex) : asymId; if (currentChain == null) { currentChain = new ChainImpl(); @@ -286,12 +279,8 @@ public void consumeAtomSite(AtomSite atomSite) { } } - int authSeqIdInt; - if (authSeqId.isDefined()) { - authSeqIdInt = authSeqId.get(atomIndex); - } else { - authSeqIdInt = (int)seqId; - } + int authSeqIdInt = authSeqId.isDefined()? authSeqId.get(atomIndex) : (int)seqId; + ResidueNumber residueNumber = new ResidueNumber(authId, authSeqIdInt, insCode); String recordName = groupPDB.get(atomIndex); @@ -304,10 +293,7 @@ public void consumeAtomSite(AtomSite atomSite) { } Group altGroup = null; - String altLocation = null; - if (labelAltId.isDefined()) { - altLocation = labelAltId.get(atomIndex); - } + String altLocation = labelAltId.isDefined()? labelAltId.get(atomIndex) : null; if (startOfNewChain) { currentGroup = createGroup(recordName, oneLetterCode, compId, seqId); From 3e26abe7d526b828fc23694f25a59c91ba2f763d Mon Sep 17 00:00:00 2001 From: josemduarte Date: Mon, 4 Mar 2024 10:26:56 -0800 Subject: [PATCH 3/3] Adding test case --- .../io/cif/CifFileConsumerImplTest.java | 49 ++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/io/cif/CifFileConsumerImplTest.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/io/cif/CifFileConsumerImplTest.java index 0d2ddaf4f6..a8925afa88 100644 --- a/biojava-structure/src/test/java/org/biojava/nbio/structure/io/cif/CifFileConsumerImplTest.java +++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/io/cif/CifFileConsumerImplTest.java @@ -181,7 +181,7 @@ private static int[] countEntityTypes(List entities) { /** * This tests for cases where dots appear in integer fields. Unusual but it happens in some PDB entries like 1s32. - * See issue https://github.com/biojava/biojava/issues/368 + * See issue ... */ @Test public void specialCases() throws IOException { @@ -220,4 +220,51 @@ public void specialCases() throws IOException { column.valueKinds().forEach(vk -> assertEquals(ValueKind.NOT_PRESENT, vk)); column.stringData().forEach(sd -> assertTrue(sd.isEmpty())); } + + /** + * Testing files with atom_site that doesn't have author fields. E.g. cif files from Meta's ESM Atlas (...) + */ + @Test + public void testAtomSiteWithMissingAuthFields() throws IOException { + // taken from MGYP000911143359.cif + String mmcifStr = + "data_\n" + + "loop_\n" + + "_atom_site.group_PDB\n" + + "_atom_site.id\n" + + "_atom_site.type_symbol\n" + + "_atom_site.label_atom_id\n" + + "_atom_site.label_comp_id\n" + + "_atom_site.label_asym_id\n" + + "_atom_site.label_entity_id\n" + + "_atom_site.label_seq_id\n" + + "_atom_site.Cartn_x\n" + + "_atom_site.Cartn_y\n" + + "_atom_site.Cartn_z\n" + + "_atom_site.occupancy\n" + + "_atom_site.B_iso_or_equiv\n" + + "_atom_site.pdbx_PDB_model_num\n" + + "\n" + + "ATOM 1 N N MET A 1 1 -26.091 68.903 7.841 1.00 90.0 1\n" + + "ATOM 2 C CA MET A 1 1 -26.275 67.677 7.069 1.00 91.0 1\n" + + "ATOM 3 C C MET A 1 1 -24.933 67.025 6.755 1.00 90.0 1\n" + + "ATOM 4 C CB MET A 1 1 -27.033 67.967 5.773 1.00 89.0 1\n" + + "ATOM 5 O O MET A 1 1 -24.314 67.331 5.734 1.00 90.0 1\n" + + "ATOM 6 C CG MET A 1 1 -28.544 67.973 5.934 1.00 86.0 1\n" + + "ATOM 7 S SD MET A 1 1 -29.390 68.904 4.598 1.00 86.0 1\n" + + "ATOM 8 C CE MET A 1 1 -29.202 67.734 3.224 1.00 83.0 1\n" + + "ATOM 9 N N ASN A 1 2 -24.267 66.233 7.730 1.00 90.0 1\n" + + "ATOM 10 C CA ASN A 1 2 -22.897 65.827 8.029 1.00 91.0 1\n" + + "ATOM 11 C C ASN A 1 2 -22.600 64.427 7.500 1.00 90.0 1\n" + + "ATOM 12 C CB ASN A 1 2 -22.634 65.893 9.535 1.00 88.0 1\n" + + "ATOM 13 O O ASN A 1 2 -23.092 63.436 8.044 1.00 89.0 1\n" + + "ATOM 14 C CG ASN A 1 2 -22.191 67.269 9.990 1.00 86.0 1\n" + + "ATOM 15 N ND2 ASN A 1 2 -22.255 67.511 11.294 1.00 87.0 1\n" + + "ATOM 16 O OD1 ASN A 1 2 -21.795 68.108 9.177 1.00 87.0 1\n" ; + MmCifFile cifFile = CifIO.readFromInputStream(new ByteArrayInputStream(mmcifStr.getBytes())).as(StandardSchemata.MMCIF); + Structure s = CifStructureConverter.fromCifFile(cifFile); + assertNotNull(s); + assertEquals(2, s.getPolyChain("A").getAtomGroups().size()); + assertEquals(2, s.getPolyChainByPDB("A").getAtomGroups().size()); + } } \ No newline at end of file