diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/Atom.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/Atom.java index ebd67c0a8c..7b1c722bd9 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/Atom.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/Atom.java @@ -163,7 +163,8 @@ public interface Atom extends Cloneable, PDBRecord { /** * Get alternate Location. - * @return a Character object representing the alt loc value + * @return a Character object representing the alt loc value. Default altLoc ('.' in mmCIF files) + * is represented by ' ' (space character, ascii 32). * @see #setAltLoc */ public Character getAltLoc(); diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/AtomImpl.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/AtomImpl.java index 10e16b4313..36ccf49efb 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/AtomImpl.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/AtomImpl.java @@ -157,10 +157,6 @@ public void setZ(double z) { @Override public double getZ() { return coords.z; } - /** - * Set alternate Location. - * @see #getAltLoc - */ @Override public void setAltLoc(Character c) { // after changing altLoc from Character to char, we do this to keep the interface the same as it used to be - JD 2016-01-27 @@ -170,11 +166,6 @@ public void setAltLoc(Character c) { altLoc = c ; } - /** - * Get alternate Location. - * @return a Character object representing the alt loc value - * @see #setAltLoc - */ @Override public Character getAltLoc() { // after changing altLoc from Character to char, we do this to keep the interface the same as it used to be - JD 2016-01-27 @@ -309,7 +300,7 @@ public void setBonds(List bonds) { @Override public void addBond(Bond bond) { if (bonds==null) { - bonds = new ArrayList(BONDS_INITIAL_CAPACITY); + bonds = new ArrayList<>(BONDS_INITIAL_CAPACITY); } bonds.add(bond); } diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/Group.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/Group.java index 6a169dbee5..f2bc5a506a 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/Group.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/Group.java @@ -103,12 +103,13 @@ public interface Group extends Serializable { /** * Set the atoms of this group. - * @see {@link Atom} + * @see Atom * @param atoms a list of atoms */ public void setAtoms(List atoms); - /** Remove all atoms from this group. + /** + * Remove all atoms from this group. * */ public void clearAtoms(); @@ -118,13 +119,14 @@ public interface Group extends Serializable { * Beware that some PDB atom names are ambiguous (e.g. CA, which means C-alpha or Calcium), * ambiguities should not occur within the same group though. To solve these ambiguities * one would need to check the atom returned for the required element with {@link Atom#getElement()} + *

+ * Note this method will return only the atom in the default alternative location (be it '.' or a letter). * * @param name a trimmed String representing the atom's PDB name, e.g. "CA" * @return an Atom object or null if no such atom exists within this group */ public Atom getAtom(String name) ; - - + /** * Get at atom by position. * diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/HetatomImpl.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/HetatomImpl.java index f89812487a..037dcb2b0b 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/HetatomImpl.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/HetatomImpl.java @@ -77,7 +77,7 @@ public class HetatomImpl implements Group { * Behaviors for how to balance memory vs. performance. * @author Andreas Prlic */ - public static enum PerformanceBehavior { + public enum PerformanceBehavior { /** use a built-in HashMap for faster access to memory, at the price of more memory consumption */ BETTER_PERFORMANCE_MORE_MEMORY, @@ -87,7 +87,7 @@ public static enum PerformanceBehavior { } - public static PerformanceBehavior performanceBehavior=PerformanceBehavior.LESS_MEMORY_SLOWER_PERFORMANCE; + private static PerformanceBehavior performanceBehavior=PerformanceBehavior.LESS_MEMORY_SLOWER_PERFORMANCE; private Map atomNameLookup; @@ -105,42 +105,28 @@ public HetatomImpl() { pdb_name = null ; residueNumber = null; - atoms = new ArrayList(); - properties = new HashMap(); + atoms = new ArrayList<>(); + properties = new HashMap<>(); parent = null; chemComp = null; altLocs = null; if ( performanceBehavior == PerformanceBehavior.BETTER_PERFORMANCE_MORE_MEMORY) - atomNameLookup = new HashMap(); + atomNameLookup = new HashMap<>(); else atomNameLookup = null; } - - /** - * returns true or false, depending if this group has 3D coordinates or not. - * @return true if Group has 3D coordinates - */ @Override public boolean has3D() { return pdb_flag; } - /** flag if group has 3D data. - * - * @param flag true to set flag that this Group has 3D coordinates - */ @Override public void setPDBFlag(boolean flag){ pdb_flag = flag ; } - /** Set three character name of Group . - * - * @param s a String specifying the PDBName value - * @see #getPDBName - */ @Override public void setPDBName(String s) { // hetatoms can have pdb_name length < 3. e.g. CU (see 1a4a position 1200 ) @@ -152,12 +138,6 @@ public void setPDBName(String s) { } - /** - * Returns the PDBName. - * - * @return a String representing the PDBName value - * @see #setPDBName - */ @Override public String getPDBName() { return pdb_name;} @@ -187,12 +167,8 @@ public void addAtom(Atom atom){ logger.warn("An atom with name " + atom.getName() + " " + altLocStr + " is already present in group: " + this.toString() + ". The atom with serial " + atom.getPDBserial() + " will be ignored in look-ups."); } } - }; - + } - /** remove all atoms - * - */ @Override public void clearAtoms() { atoms.clear(); @@ -245,8 +221,7 @@ public Atom getAtom(String name) { if ( atomNameLookup != null) return atomNameLookup.get(name); else { - /** This is the performance penalty we pay for NOT using the atomnameLookup in PerformanceBehaviour.LESS_MEMORY_SLOWER_PERFORMANCE - */ + // This is the performance penalty we pay for NOT using the atomnameLookup in PerformanceBehaviour.LESS_MEMORY_SLOWER_PERFORMANCE for (Atom a : atoms) { if (a.getName().equals(name)) { return a; @@ -279,16 +254,13 @@ public boolean hasAtom(String fullName) { Atom a = atomNameLookup.get(fullName.trim()); return a != null; } else { - /** This is the performance penalty we pay for NOT using the atomnameLookup in PerformanceBehaviour.LESS_MEMORY_SLOWER_PERFORMANCE - */ + // This is the performance penalty we pay for NOT using the atomnameLookup in PerformanceBehaviour.LESS_MEMORY_SLOWER_PERFORMANCE for (Atom a : atoms) { if (a.getName().equals(fullName)) { return true; } } return false; - - } } @@ -400,42 +372,21 @@ public void setProperties(Map props) { properties = props ; } - /** return properties. - * - * @return a HashMap object representing the properties value - * @see #setProperties - */ @Override public Map getProperties() { return properties ; } - /** set a single property . - * - * @see #getProperties - * @see #getProperty - */ @Override public void setProperty(String key, Object value){ properties.put(key,value); } - /** get a single property . - * @param key a String - * @return an Object - * @see #setProperty - * @see #setProperties - */ @Override public Object getProperty(String key){ return properties.get(key); } - - /** return an AtomIterator. - * - * @return an Iterator object - */ @Override public Iterator iterator() { return new AtomIterator(this); @@ -588,7 +539,7 @@ public boolean hasAltLoc() { @Override public List getAltLocs() { if ( altLocs == null) - return new ArrayList(); + return new ArrayList<>(); return altLocs; } @@ -629,7 +580,7 @@ public Group getAltLocGroup(Character altLoc) { @Override public void addAltLoc(Group group) { if ( altLocs == null) { - altLocs = new ArrayList(); + altLocs = new ArrayList<>(); } altLocs.add(group); @@ -640,10 +591,6 @@ public boolean isWater() { return GroupType.WATERNAMES.contains(pdb_name); } - /** attempts to reduce the memory imprint of this group by trimming - * all internal Collection objects to the required size. - * - */ @Override public void trimToSize(){ @@ -663,10 +610,10 @@ public void trimToSize(){ } // now let's fit the hashmaps to size - properties = new HashMap(properties); + properties = new HashMap<>(properties); if ( atomNameLookup != null) - atomNameLookup = new HashMap(atomNameLookup); + atomNameLookup = new HashMap<>(atomNameLookup); } diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/BondMaker.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/BondMaker.java index 2b184063c5..bb92724aa4 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/BondMaker.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/BondMaker.java @@ -128,25 +128,7 @@ private void formPeptideBonds() { continue; } - Atom carboxylC; - Atom aminoN; - - carboxylC = tail.getC(); - aminoN = head.getN(); - - - if (carboxylC == null || aminoN == null) { - // some structures may be incomplete and not store info - // about all of their atoms - - continue; - } - - - if (Calc.getDistance(carboxylC, aminoN) < MAX_PEPTIDE_BOND_LENGTH) { - new BondImpl(carboxylC, aminoN, 1); - } - + formBondAltlocAware(tail, "C", head, "N", MAX_PEPTIDE_BOND_LENGTH, 1); } } } @@ -171,18 +153,7 @@ private void formNucleotideBonds() { continue; } - Atom phosphorous = head.getP(); - Atom oThreePrime = tail.getO3Prime(); - - if (phosphorous == null || oThreePrime == null) { - continue; - } - - - if (Calc.getDistance(phosphorous, oThreePrime) < MAX_NUCLEOTIDE_BOND_LENGTH) { - new BondImpl(phosphorous, oThreePrime, 1); - } - + formBondAltlocAware(head, "P", tail, "O3'", MAX_NUCLEOTIDE_BOND_LENGTH, 1); } } } @@ -200,10 +171,7 @@ private void formIntraResidueBonds() { // Now add support for altLocGroup List totList = new ArrayList(); totList.add(mainGroup); - for(Group altLoc: mainGroup.getAltLocs()){ - totList.add(altLoc); - } - + totList.addAll(mainGroup.getAltLocs()); // Now iterate through this list for(Group group : totList){ @@ -213,18 +181,9 @@ private void formIntraResidueBonds() { group.getPDBName(), group.getResidueNumber(), aminoChemComp.getAtoms().size(), aminoChemComp.getBonds().size()); for (ChemCompBond chemCompBond : aminoChemComp.getBonds()) { - Atom a = getAtom(chemCompBond.getAtom_id_1(), group); - Atom b = getAtom(chemCompBond.getAtom_id_2(), group); - if ( a != null && b != null){ - int bondOrder = chemCompBond.getNumericalBondOrder(); - logger.debug("Forming bond between atoms {}-{} and {}-{} with bond order {}", - a.getPDBserial(), a.getName(), b.getPDBserial(), b.getName(), bondOrder); - new BondImpl(a, b, bondOrder); - } - else{ - // Some of the atoms were missing. That's fine, there's - // nothing to do in this case. - } + // note we don't check distance to make this call not too expensive + formBondAltlocAware(group, chemCompBond.getAtom_id_1(), + group, chemCompBond.getAtom_id_2(), -1, chemCompBond.getNumericalBondOrder()); } } } @@ -233,19 +192,80 @@ private void formIntraResidueBonds() { } } - private Atom getAtom(String atomId, Group group) { - Atom a = group.getAtom(atomId); - // Check for deuteration - if(a==null && atomId.startsWith("H")) { - a = group.getAtom(atomId.replaceFirst("H", "D")); - // Check it is actually deuterated - if(a!=null){ - if(!a.getElement().equals(Element.D)){ + /** + * Form bond between atoms of the given names and groups, respecting alt loc rules to form bonds: + * no bonds between differently named alt locs (that are not the default alt loc '.') + * and multiple bonds for default alt loc to named alt loc. + * @param g1 first group + * @param name1 name of atom in first group + * @param g2 second group + * @param name2 name of atom in second group + * @param maxAllowedLength max length, if atoms distance above this length no bond will be added. If negative no check on distance is performed. + * @param bondOrder the bond order to be set in the created bond(s) + */ + private void formBondAltlocAware(Group g1, String name1, Group g2, String name2, double maxAllowedLength, int bondOrder) { + List a1s = getAtoms(g1, name1); + List a2s = getAtoms(g2, name2); + + if (a1s.isEmpty() || a2s.isEmpty()) { + // some structures may be incomplete and not store info + // about all of their atoms + return; + } + + for (Atom a1:a1s) { + for (Atom a2:a2s) { + if (a1.getAltLoc() != null && a2.getAltLoc()!=null && + a1.getAltLoc()!=' ' && a2.getAltLoc()!=' ' && + a1.getAltLoc() != a2.getAltLoc()) { + logger.debug("Skipping bond between atoms with differently named alt locs {} (altLoc '{}') -- {} (altLoc '{}')", + a1.toString(), a1.getAltLoc(), a2.toString(), a2.getAltLoc()); + continue; + } + if (maxAllowedLength<0) { + // negative maxAllowedLength means we don't check distance and always add bond + logger.debug("Forming bond between atoms {}-{} and {}-{} with bond order {}", + a1.getPDBserial(), a1.getName(), a2.getPDBserial(), a2.getName(), bondOrder); + new BondImpl(a1, a2, bondOrder); + } else { + if (Calc.getDistance(a1, a2) < maxAllowedLength) { + logger.debug("Forming bond between atoms {}-{} and {}-{} with bond order {}. Distance is below {}", + a1.getPDBserial(), a1.getName(), a2.getPDBserial(), a2.getName(), bondOrder, maxAllowedLength); + new BondImpl(a1, a2, bondOrder); + } else { + logger.debug("Not forming bond between atoms {}-{} and {}-{} with bond order {}, because distance is above {}", + a1.getPDBserial(), a1.getName(), a2.getPDBserial(), a2.getName(), bondOrder, maxAllowedLength); + } + } + } + } + } + + /** + * Get all atoms (including possible alt locs) in given group that are name with the given atom name + * @param g the group + * @param name the atom name + * @return list of all atoms, or empty list if no atoms with the name + */ + private List getAtoms(Group g, String name) { + List atoms = new ArrayList<>(); + List groupsWithAltLocs = new ArrayList<>(); + groupsWithAltLocs.add(g); + groupsWithAltLocs.addAll(g.getAltLocs()); + for (Group group : groupsWithAltLocs) { + Atom a = group.getAtom(name); + // Check for deuteration + if (a==null && name.startsWith("H")) { + a = group.getAtom(name.replaceFirst("H", "D")); + // Check it is actually deuterated + if (a!=null && !a.getElement().equals(Element.D)){ a=null; } } + if (a!=null) + atoms.add(a); } - return a; + return atoms; } private void trimBondLists() { diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/SimpleMMcifConsumer.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/SimpleMMcifConsumer.java index d16a3b50dc..b0cdecdc14 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/SimpleMMcifConsumer.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/SimpleMMcifConsumer.java @@ -213,7 +213,7 @@ private Entity getEntity(int entity_id){ } } } catch (NumberFormatException e) { - logger.warn("Entity id does not look like a number:", e.getMessage()); + logger.warn("Entity id does not look like a number: {}", e.getMessage()); } return null; } @@ -729,7 +729,7 @@ public void documentEnd() { // we'll only add seqres chains that are polymeric or unknown if (type==null || type==EntityType.POLYMER ) { - seqResChains.add(seqres); + seqResChains.add(seqres); } logger.debug(" seqres: " + asym.getId() + " " + seqres + "<") ; diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/TestAltLocs.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/TestAltLocs.java index 7224a29247..d1c6779e7c 100644 --- a/biojava-structure/src/test/java/org/biojava/nbio/structure/TestAltLocs.java +++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/TestAltLocs.java @@ -37,7 +37,9 @@ import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Set; import static org.junit.Assert.*; @@ -754,4 +756,269 @@ public void testMmcifConversionAllAltlocs() throws IOException { } + /** + * Test that intra-residue bonds between alt locs link atoms with same altloc codes + * https://github.com/rcsb/mmtf/issues/44 + */ + @Test + public void testIntraResidueBondsBetweenAltlocs() throws IOException { + // from 5MOO + String mmcifData = + "data_test\n" + + "loop_\n" + + "_atom_site.group_PDB \n" + + "_atom_site.id \n" + + "_atom_site.type_symbol \n" + + "_atom_site.label_atom_id \n" + + "_atom_site.label_alt_id \n" + + "_atom_site.label_comp_id \n" + + "_atom_site.label_asym_id \n" + + "_atom_site.label_entity_id \n" + + "_atom_site.label_seq_id \n" + + "_atom_site.pdbx_PDB_ins_code \n" + + "_atom_site.Cartn_x \n" + + "_atom_site.Cartn_y \n" + + "_atom_site.Cartn_z \n" + + "_atom_site.occupancy \n" + + "_atom_site.B_iso_or_equiv \n" + + "_atom_site.pdbx_formal_charge \n" + + "_atom_site.auth_seq_id \n" + + "_atom_site.auth_comp_id \n" + + "_atom_site.auth_asym_id \n" + + "_atom_site.auth_atom_id \n" + + "_atom_site.pdbx_PDB_model_num \n" + + "ATOM 1405 N N A MET A 1 86 ? 10.748 -17.610 -6.975 0.47 16.12 ? 104 MET A N 1 \n" + + "ATOM 1406 N N B MET A 1 86 ? 10.802 -17.694 -6.986 0.53 17.92 ? 104 MET A N 1 \n" + + "ATOM 1407 C CA A MET A 1 86 ? 11.189 -17.392 -5.610 0.47 15.78 ? 104 MET A CA 1 \n" + + "ATOM 1408 C CA B MET A 1 86 ? 11.033 -17.368 -5.587 0.53 18.29 ? 104 MET A CA 1 \n" + + "ATOM 1409 C C A MET A 1 86 ? 10.952 -18.663 -4.810 0.47 15.91 ? 104 MET A C 1 \n" + + "ATOM 1410 C C B MET A 1 86 ? 10.882 -18.643 -4.767 0.53 17.40 ? 104 MET A C 1 \n" + + "ATOM 1411 O O A MET A 1 86 ? 10.120 -19.504 -5.154 0.47 18.21 ? 104 MET A O 1 \n" + + "ATOM 1412 O O B MET A 1 86 ? 10.018 -19.474 -5.052 0.53 20.02 ? 104 MET A O 1 \n" + + "ATOM 1413 C CB A MET A 1 86 ? 10.477 -16.204 -4.933 0.47 17.14 ? 104 MET A CB 1 \n" + + "ATOM 1414 C CB B MET A 1 86 ? 10.001 -16.336 -5.111 0.53 18.92 ? 104 MET A CB 1 \n" + + "ATOM 1415 C CG A MET A 1 86 ? 9.019 -16.476 -4.619 0.47 20.01 ? 104 MET A CG 1 \n" + + "ATOM 1416 C CG B MET A 1 86 ? 10.030 -16.038 -3.634 0.53 19.12 ? 104 MET A CG 1 \n" + + "ATOM 1417 S SD A MET A 1 86 ? 8.207 -15.088 -3.838 0.47 22.06 ? 104 MET A SD 1 \n" + + "ATOM 1418 S SD B MET A 1 86 ? 8.874 -14.724 -3.205 0.53 20.16 ? 104 MET A SD 1 \n" + + "ATOM 1419 C CE A MET A 1 86 ? 9.151 -14.973 -2.340 0.47 25.15 ? 104 MET A CE 1 \n" + + "ATOM 1420 C CE B MET A 1 86 ? 7.269 -15.536 -3.380 0.53 20.38 ? 104 MET A CE 1 \n" + + "ATOM 1421 H H A MET A 1 86 ? 9.931 -18.207 -7.055 0.47 15.58 ? 104 MET A H 1 \n" + + "ATOM 1422 H H B MET A 1 86 ? 10.144 -18.461 -7.109 0.53 18.91 ? 104 MET A H 1 \n" + + "ATOM 1423 H HA A MET A 1 86 ? 12.256 -17.182 -5.644 0.47 15.14 ? 104 MET A HA 1 \n" + + "ATOM 1424 H HA B MET A 1 86 ? 12.033 -16.953 -5.465 0.53 19.55 ? 104 MET A HA 1 \n" + + "ATOM 1425 H HB2 A MET A 1 86 ? 10.986 -15.920 -4.008 0.47 17.68 ? 104 MET A HB2 1 \n" + + "ATOM 1426 H HB3 A MET A 1 86 ? 10.484 -15.364 -5.622 0.47 17.68 ? 104 MET A HB3 1 \n" + + "ATOM 1427 H HB3 B MET A 1 86 ? 9.001 -16.676 -5.398 0.53 20.49 ? 104 MET A HB3 1 \n" + + "ATOM 1428 H HG2 A MET A 1 86 ? 8.490 -16.704 -5.546 0.47 20.93 ? 104 MET A HG2 1 \n" + + "ATOM 1429 H HG3 A MET A 1 86 ? 8.956 -17.315 -3.927 0.47 20.93 ? 104 MET A HG3 1 \n" + + "ATOM 1430 H HE2 A MET A 1 86 ? 9.861 -14.153 -2.440 0.47 27.31 ? 104 MET A HE2 1 \n" + + "ATOM 1431 H HE2 B MET A 1 86 ? 7.346 -16.554 -2.998 0.53 23.03 ? 104 MET A HE2 1 \n" + + "ATOM 1432 H HE3 B MET A 1 86 ? 6.996 -15.566 -4.437 0.53 23.03 ? 104 MET A HE3 1 "; + + SimpleMMcifParser parser = new SimpleMMcifParser(); + SimpleMMcifConsumer consumer = new SimpleMMcifConsumer(); + parser.addMMcifConsumer(consumer); + + FileParsingParameters params = new FileParsingParameters(); + params.setCreateAtomBonds(true); + consumer.setFileParsingParameters(params); + + BufferedReader buf = new BufferedReader(new StringReader(mmcifData)); + parser.parse(buf); + buf.close(); + + Structure s = consumer.getStructure(); + Chain c = s.getPolyChains().get(0); + assertEquals(1, c.getAtomGroups().size()); + + Group g = c.getAtomGroup(0); + + assertEquals(1, g.getAltLocs().size()); + + boolean foundCEHE3bond = false; + for (Atom a : g.getAtoms()) { + for (Bond b : a.getBonds()) { +// if (b.getAtomA().getAltLoc() != b.getAtomB().getAltLoc()) { +// System.out.println( +// b.getAtomA().toString() + ": '" + b.getAtomA().getAltLoc() + "' --- " + +// b.getAtomB().toString() + ": '" + b.getAtomB().getAltLoc() + "'"); +// } + // no bonds between atoms with different alt locs + assertEquals(b.getAtomA().toString() + " --- " + b.getAtomB().toString(), + b.getAtomA().getAltLoc(), b.getAtomB().getAltLoc()); + + // a bond should exist between CE and HE3 but only for altloc=B + if ((b.getAtomA().getName().equals("CE") && b.getAtomB().getName().equals("HE3")) || + (b.getAtomA().getName().equals("HE3") && b.getAtomB().getName().equals("CE")) ) { + foundCEHE3bond = true; + } + } + } + + // there should be a bond between CE and HE3 but only for altloc=B + assertTrue(foundCEHE3bond); + + } + + /** + * Test that inter-residue bonds between alt locs link atoms with same altloc codes or default alt loc to all alt locs + * https://github.com/rcsb/mmtf/issues/44 + */ + @Test + public void testInterResidueBondsBetweenAltlocs() throws IOException { + // from 5MOO + String mmcifData = + "data_test\n" + + "# \n" + + "loop_\n" + + "_entity.id \n" + + "_entity.type \n" + + "_entity.src_method \n" + + "_entity.pdbx_description \n" + + "_entity.formula_weight \n" + + "_entity.pdbx_number_of_molecules \n" + + "_entity.pdbx_ec \n" + + "_entity.pdbx_mutation \n" + + "_entity.pdbx_fragment \n" + + "_entity.details \n" + + "1 polymer nat 'Cationic trypsin' 23324.287 1 3.4.21.4 ? ? ? \n" + + "# \n" + + "loop_\n" + + "_entity_poly_seq.entity_id \n" + + "_entity_poly_seq.num \n" + + "_entity_poly_seq.mon_id \n" + + "_entity_poly_seq.hetero \n" + + "1 1 ILE n \n" + + "1 2 MET n \n" + + "# \n" + + "loop_\n" + + "_struct_asym.id \n" + + "_struct_asym.pdbx_blank_PDB_chainid_flag \n" + + "_struct_asym.pdbx_modified \n" + + "_struct_asym.entity_id \n" + + "_struct_asym.details \n" + + "A N N 1 ? \n" + + "# \n" + + "loop_\n" + + "_atom_site.group_PDB \n" + + "_atom_site.id \n" + + "_atom_site.type_symbol \n" + + "_atom_site.label_atom_id \n" + + "_atom_site.label_alt_id \n" + + "_atom_site.label_comp_id \n" + + "_atom_site.label_asym_id \n" + + "_atom_site.label_entity_id \n" + + "_atom_site.label_seq_id \n" + + "_atom_site.pdbx_PDB_ins_code \n" + + "_atom_site.Cartn_x \n" + + "_atom_site.Cartn_y \n" + + "_atom_site.Cartn_z \n" + + "_atom_site.occupancy \n" + + "_atom_site.B_iso_or_equiv \n" + + "_atom_site.pdbx_formal_charge \n" + + "_atom_site.auth_seq_id \n" + + "_atom_site.auth_comp_id \n" + + "_atom_site.auth_asym_id \n" + + "_atom_site.auth_atom_id \n" + + "_atom_site.pdbx_PDB_model_num \n" + + "ATOM 1385 N N . ILE A 1 1 ? 10.900 -16.328 -10.274 1.00 17.47 ? 103 ILE A N 1 \n" + + "ATOM 1386 C CA . ILE A 1 1 ? 10.885 -17.487 -9.388 1.00 17.76 ? 103 ILE A CA 1 \n" + + "ATOM 1387 C C . ILE A 1 1 ? 11.374 -17.058 -8.011 1.00 17.35 ? 103 ILE A C 1 \n" + + "ATOM 1388 O O . ILE A 1 1 ? 12.265 -16.211 -7.883 1.00 18.51 ? 103 ILE A O 1 \n" + + "ATOM 1389 C CB . ILE A 1 1 ? 11.721 -18.644 -9.986 1.00 18.19 ? 103 ILE A CB 1 \n" + + "ATOM 1390 C CG1 . ILE A 1 1 ? 11.610 -19.916 -9.144 1.00 19.64 ? 103 ILE A CG1 1 \n" + + "ATOM 1391 C CG2 . ILE A 1 1 ? 13.177 -18.246 -10.209 1.00 19.73 ? 103 ILE A CG2 1 \n" + + "ATOM 1392 C CD1 . ILE A 1 1 ? 12.217 -21.162 -9.820 1.00 22.94 ? 103 ILE A CD1 1 \n" + + "ATOM 1393 H H A ILE A 1 1 ? 11.598 -15.614 -10.041 1.00 17.71 ? 103 ILE A H 1 \n" + + "ATOM 1394 D D B ILE A 1 1 ? 11.598 -15.614 -10.041 0.00 17.71 ? 103 ILE A D 1 \n" + + "ATOM 1395 H HA . ILE A 1 1 ? 9.856 -17.843 -9.277 1.00 17.70 ? 103 ILE A HA 1 \n" + + "ATOM 1396 H HB . ILE A 1 1 ? 11.300 -18.886 -10.957 1.00 18.93 ? 103 ILE A HB 1 \n" + + "ATOM 1397 H HG12 . ILE A 1 1 ? 12.149 -19.788 -8.209 1.00 20.93 ? 103 ILE A HG12 1 \n" + + "ATOM 1398 H HG13 . ILE A 1 1 ? 10.563 -20.127 -8.939 1.00 20.93 ? 103 ILE A HG13 1 \n" + + "ATOM 1399 H HG21 . ILE A 1 1 ? 13.669 -19.035 -10.776 1.00 20.97 ? 103 ILE A HG21 1 \n" + + "ATOM 1400 H HG22 . ILE A 1 1 ? 13.235 -17.312 -10.767 1.00 20.97 ? 103 ILE A HG22 1 \n" + + "ATOM 1401 H HG23 . ILE A 1 1 ? 13.683 -18.144 -9.251 1.00 20.97 ? 103 ILE A HG23 1 \n" + + "ATOM 1402 H HD11 . ILE A 1 1 ? 13.299 -21.078 -9.905 1.00 24.96 ? 103 ILE A HD11 1 \n" + + "ATOM 1403 H HD12 . ILE A 1 1 ? 11.967 -22.036 -9.223 1.00 24.96 ? 103 ILE A HD12 1 \n" + + "ATOM 1404 H HD13 . ILE A 1 1 ? 11.779 -21.281 -10.808 1.00 24.96 ? 103 ILE A HD13 1 \n" + + "ATOM 1405 N N A MET A 1 2 ? 10.748 -17.610 -6.975 0.47 16.12 ? 104 MET A N 1 \n" + + "ATOM 1406 N N B MET A 1 2 ? 10.802 -17.694 -6.986 0.53 17.92 ? 104 MET A N 1 \n" + + "ATOM 1407 C CA A MET A 1 2 ? 11.189 -17.392 -5.610 0.47 15.78 ? 104 MET A CA 1 \n" + + "ATOM 1408 C CA B MET A 1 2 ? 11.033 -17.368 -5.587 0.53 18.29 ? 104 MET A CA 1 \n" + + "ATOM 1409 C C A MET A 1 2 ? 10.952 -18.663 -4.810 0.47 15.91 ? 104 MET A C 1 \n" + + "ATOM 1410 C C B MET A 1 2 ? 10.882 -18.643 -4.767 0.53 17.40 ? 104 MET A C 1 \n" + + "ATOM 1411 O O A MET A 1 2 ? 10.120 -19.504 -5.154 0.47 18.21 ? 104 MET A O 1 \n" + + "ATOM 1412 O O B MET A 1 2 ? 10.018 -19.474 -5.052 0.53 20.02 ? 104 MET A O 1 \n" + + "ATOM 1413 C CB A MET A 1 2 ? 10.477 -16.204 -4.933 0.47 17.14 ? 104 MET A CB 1 \n" + + "ATOM 1414 C CB B MET A 1 2 ? 10.001 -16.336 -5.111 0.53 18.92 ? 104 MET A CB 1 \n" + + "ATOM 1415 C CG A MET A 1 2 ? 9.019 -16.476 -4.619 0.47 20.01 ? 104 MET A CG 1 \n" + + "ATOM 1416 C CG B MET A 1 2 ? 10.030 -16.038 -3.634 0.53 19.12 ? 104 MET A CG 1 \n" + + "ATOM 1417 S SD A MET A 1 2 ? 8.207 -15.088 -3.838 0.47 22.06 ? 104 MET A SD 1 \n" + + "ATOM 1418 S SD B MET A 1 2 ? 8.874 -14.724 -3.205 0.53 20.16 ? 104 MET A SD 1 \n" + + "ATOM 1419 C CE A MET A 1 2 ? 9.151 -14.973 -2.340 0.47 25.15 ? 104 MET A CE 1 \n" + + "ATOM 1420 C CE B MET A 1 2 ? 7.269 -15.536 -3.380 0.53 20.38 ? 104 MET A CE 1 \n" + + "ATOM 1421 H H A MET A 1 2 ? 9.931 -18.207 -7.055 0.47 15.58 ? 104 MET A H 1 \n" + + "ATOM 1422 H H B MET A 1 2 ? 10.144 -18.461 -7.109 0.53 18.91 ? 104 MET A H 1 \n" + + "ATOM 1423 H HA A MET A 1 2 ? 12.256 -17.182 -5.644 0.47 15.14 ? 104 MET A HA 1 \n" + + "ATOM 1424 H HA B MET A 1 2 ? 12.033 -16.953 -5.465 0.53 19.55 ? 104 MET A HA 1 \n" + + "ATOM 1425 H HB2 A MET A 1 2 ? 10.986 -15.920 -4.008 0.47 17.68 ? 104 MET A HB2 1 \n" + + "ATOM 1426 H HB3 A MET A 1 2 ? 10.484 -15.364 -5.622 0.47 17.68 ? 104 MET A HB3 1 \n" + + "ATOM 1427 H HB3 B MET A 1 2 ? 9.001 -16.676 -5.398 0.53 20.49 ? 104 MET A HB3 1 \n" + + "ATOM 1428 H HG2 A MET A 1 2 ? 8.490 -16.704 -5.546 0.47 20.93 ? 104 MET A HG2 1 \n" + + "ATOM 1429 H HG3 A MET A 1 2 ? 8.956 -17.315 -3.927 0.47 20.93 ? 104 MET A HG3 1 \n" + + "ATOM 1430 H HE2 A MET A 1 2 ? 9.861 -14.153 -2.440 0.47 27.31 ? 104 MET A HE2 1 \n" + + "ATOM 1431 H HE2 B MET A 1 2 ? 7.346 -16.554 -2.998 0.53 23.03 ? 104 MET A HE2 1 \n" + + "ATOM 1432 H HE3 B MET A 1 2 ? 6.996 -15.566 -4.437 0.53 23.03 ? 104 MET A HE3 1 "; + + SimpleMMcifParser parser = new SimpleMMcifParser(); + SimpleMMcifConsumer consumer = new SimpleMMcifConsumer(); + parser.addMMcifConsumer(consumer); + + FileParsingParameters params = new FileParsingParameters(); + params.setCreateAtomBonds(true); + consumer.setFileParsingParameters(params); + + BufferedReader buf = new BufferedReader(new StringReader(mmcifData)); + parser.parse(buf); + buf.close(); + + Structure s = consumer.getStructure(); + Chain c = s.getPolyChains().get(0); + assertEquals(2, c.getAtomGroups().size()); + + // inter residue bonds and alt locs + // ILE-C (.) must be linked to both MET-N (A and B alt locs) + Group g1 = c.getAtomGroup(0); + + Atom catom = g1.getAtom("C"); + List bonds = new ArrayList<>(); + for (Bond b : catom.getBonds()) { + if (b.getAtomA().getName().equals("N") || b.getAtomB().getName().equals("N")) { + bonds.add(b); + } + } + + assertEquals(2, bonds.size()); + + Set seenAltLocs = new HashSet<>(); + for (Bond b : bonds) { + Atom aAtom = b.getAtomA(); + Atom bAtom = b.getAtomB(); + Atom nAtom; + if (aAtom.getName().equals("N")) { + nAtom = aAtom; + } else { + nAtom = bAtom; + } + seenAltLocs.add(nAtom.getAltLoc()); + } + // 2 distinct N atoms: alt loc A and B + assertEquals(2, seenAltLocs.size()); + assertTrue(seenAltLocs.contains('A')); + assertTrue(seenAltLocs.contains('B')); + + } + + }