From 93c1c8f9d4c1fa3d12d2c7f7d250532ed6b62098 Mon Sep 17 00:00:00 2001 From: josemduarte Date: Fri, 21 Feb 2025 11:01:55 -0800 Subject: [PATCH 1/3] Handle more missing fields in struct_ref_seq --- .../io/cif/CifStructureConsumerImpl.java | 55 ++++++++++--------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/cif/CifStructureConsumerImpl.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/cif/CifStructureConsumerImpl.java index b204687762..45fd8fc417 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/cif/CifStructureConsumerImpl.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/cif/CifStructureConsumerImpl.java @@ -975,7 +975,7 @@ public void consumeStructRefSeq(StructRefSeq structRefSeq) { dbRef.setIdCode(structRefSeq.getPdbxPDBIdCode().isDefined()? structRefSeq.getPdbxPDBIdCode().get(rowIndex):null); dbRef.setDbAccession(structRefSeq.getPdbxDbAccession().isDefined()? structRefSeq.getPdbxDbAccession().get(rowIndex):null); dbRef.setDbIdCode(structRefSeq.getPdbxDbAccession().isDefined()? structRefSeq.getPdbxDbAccession().get(rowIndex):null); - dbRef.setChainName(structRefSeq.getPdbxStrandId().get(rowIndex)); + dbRef.setChainName(structRefSeq.getPdbxStrandId().isDefined()? structRefSeq.getPdbxStrandId().get(rowIndex):null); OptionalInt structRefRowIndex = IntStream.range(0, structRef.getRowCount()) .filter(i -> structRef.getId().get(i).equals(refId)) @@ -990,34 +990,39 @@ public void consumeStructRefSeq(StructRefSeq structRefSeq) { int seqBegin; int seqEnd; + char beginInsCode = ' '; + char endInsCode = ' '; - try { - seqBegin = Integer.parseInt(structRefSeq.getPdbxAuthSeqAlignBeg().get(rowIndex)); - seqEnd = Integer.parseInt(structRefSeq.getPdbxAuthSeqAlignEnd().get(rowIndex)); - } catch (NumberFormatException e) { - // this happens in a few entries, annotation error? e.g. 6eoj - logger.warn("Couldn't parse pdbx_auth_seq_align_beg/end in _struct_ref_seq. Will not store dbref " + - "alignment info for accession {}. Error: {}", dbRef.getDbAccession(), e.getMessage()); - return; - } + if (structRefSeq.getPdbxAuthSeqAlignBeg().isDefined() && structRefSeq.getPdbxAuthSeqAlignEnd().isDefined()) { + try { + seqBegin = Integer.parseInt(structRefSeq.getPdbxAuthSeqAlignBeg().get(rowIndex)); + seqEnd = Integer.parseInt(structRefSeq.getPdbxAuthSeqAlignEnd().get(rowIndex)); + } catch (NumberFormatException e) { + // this happens in a few entries, annotation error? e.g. 6eoj + logger.warn("Couldn't parse pdbx_auth_seq_align_beg/end in _struct_ref_seq. Will not store dbref " + + "alignment info for accession {}. Error: {}", dbRef.getDbAccession(), e.getMessage()); + return; + } - char beginInsCode = ' '; - String pdbxSeqAlignBegInsCode = structRefSeq.getPdbxSeqAlignBegInsCode().get(rowIndex); - if (pdbxSeqAlignBegInsCode.length() > 0) { - beginInsCode = pdbxSeqAlignBegInsCode.charAt(0); - } + String pdbxSeqAlignBegInsCode = structRefSeq.getPdbxSeqAlignBegInsCode().get(rowIndex); + if (pdbxSeqAlignBegInsCode.length() > 0) { + beginInsCode = pdbxSeqAlignBegInsCode.charAt(0); + } - char endInsCode = ' '; - String pdbxSeqAlignEndInsCode = structRefSeq.getPdbxSeqAlignEndInsCode().get(rowIndex); - if (pdbxSeqAlignEndInsCode.length() > 0) { - endInsCode = pdbxSeqAlignEndInsCode.charAt(0); - } + String pdbxSeqAlignEndInsCode = structRefSeq.getPdbxSeqAlignEndInsCode().get(rowIndex); + if (pdbxSeqAlignEndInsCode.length() > 0) { + endInsCode = pdbxSeqAlignEndInsCode.charAt(0); + } - if (beginInsCode == '?') { - beginInsCode = ' '; - } - if (endInsCode == '?') { - endInsCode = ' '; + if (beginInsCode == '?') { + beginInsCode = ' '; + } + if (endInsCode == '?') { + endInsCode = ' '; + } + } else { + seqBegin = structRefSeq.getSeqAlignBeg().get(rowIndex); + seqEnd = structRefSeq.getSeqAlignEnd().get(rowIndex); } dbRef.setSeqBegin(seqBegin); From 65d3dcde9a49b3b3f36750815c2c2b3e0d501d5a Mon Sep 17 00:00:00 2001 From: josemduarte Date: Fri, 21 Feb 2025 11:24:49 -0800 Subject: [PATCH 2/3] Lenient to missing atom_site.occupancy --- .../biojava/nbio/structure/io/cif/CifStructureConsumerImpl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/cif/CifStructureConsumerImpl.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/cif/CifStructureConsumerImpl.java index 45fd8fc417..1dccf8ae30 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/cif/CifStructureConsumerImpl.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/cif/CifStructureConsumerImpl.java @@ -333,7 +333,7 @@ public void consumeAtomSite(AtomSite atomSite) { atom.setY(cartnY.get(atomIndex)); atom.setZ(cartnZ.get(atomIndex)); - atom.setOccupancy((float) occupancy.get(atomIndex)); + atom.setOccupancy((float) (occupancy.isDefined()? occupancy.get(atomIndex) : 1.0)); atom.setTempFactor((float) bIsoOrEquiv.get(atomIndex)); if (altLocation == null || altLocation.isEmpty() || ".".equals(altLocation)) { From 12dcb2ce3c1a03d67798311ca7cf6fd26e12563f Mon Sep 17 00:00:00 2001 From: josemduarte Date: Fri, 21 Feb 2025 15:29:44 -0800 Subject: [PATCH 3/3] Another safeguard, that applies to some PDB-IHM entries --- .../nbio/structure/io/cif/CifStructureConsumerImpl.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/cif/CifStructureConsumerImpl.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/cif/CifStructureConsumerImpl.java index 1dccf8ae30..c610c05f2b 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/cif/CifStructureConsumerImpl.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/cif/CifStructureConsumerImpl.java @@ -1819,6 +1819,10 @@ private void initMaps() { String[] chainNames = entityPoly.getPdbxStrandId().get(rowIndex).split(","); List asymIds = entityId2asymId.get(entityPoly.getEntityId().get(rowIndex)); + if (asymIds == null) { + logger.warn("No asym ids found for entity {} in _struct_asym. Can't provide a mapping from asym ids to author chain ids for this entity", entityPoly.getEntityId().get(rowIndex)); + break; + } if (chainNames.length != asymIds.size()) { logger.warn("The list of asym ids (from _struct_asym) and the list of author ids (from _entity_poly) " + "for entity {} have different lengths! Can't provide a mapping from asym ids to author chain " +