From 761799b7e8c727d573fa834a5bb36959a0292b10 Mon Sep 17 00:00:00 2001 From: Jose Duarte Date: Sat, 9 Nov 2019 22:17:56 -0800 Subject: [PATCH 1/8] Fixing issue: no bonds between different altloc atoms --- .../org/biojava/nbio/structure/AtomImpl.java | 2 +- .../biojava/nbio/structure/HetatomImpl.java | 47 +++------- .../biojava/nbio/structure/io/BondMaker.java | 18 ++-- .../biojava/nbio/structure/TestAltLocs.java | 93 +++++++++++++++++++ 4 files changed, 115 insertions(+), 45 deletions(-) diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/AtomImpl.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/AtomImpl.java index 10e16b4313..a270b06983 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/AtomImpl.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/AtomImpl.java @@ -309,7 +309,7 @@ public void setBonds(List bonds) { @Override public void addBond(Bond bond) { if (bonds==null) { - bonds = new ArrayList(BONDS_INITIAL_CAPACITY); + bonds = new ArrayList<>(BONDS_INITIAL_CAPACITY); } bonds.add(bond); } diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/HetatomImpl.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/HetatomImpl.java index f89812487a..cc5356b942 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/HetatomImpl.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/HetatomImpl.java @@ -77,7 +77,7 @@ public class HetatomImpl implements Group { * Behaviors for how to balance memory vs. performance. * @author Andreas Prlic */ - public static enum PerformanceBehavior { + public enum PerformanceBehavior { /** use a built-in HashMap for faster access to memory, at the price of more memory consumption */ BETTER_PERFORMANCE_MORE_MEMORY, @@ -87,7 +87,7 @@ public static enum PerformanceBehavior { } - public static PerformanceBehavior performanceBehavior=PerformanceBehavior.LESS_MEMORY_SLOWER_PERFORMANCE; + private static PerformanceBehavior performanceBehavior=PerformanceBehavior.LESS_MEMORY_SLOWER_PERFORMANCE; private Map atomNameLookup; @@ -105,42 +105,28 @@ public HetatomImpl() { pdb_name = null ; residueNumber = null; - atoms = new ArrayList(); - properties = new HashMap(); + atoms = new ArrayList<>(); + properties = new HashMap<>(); parent = null; chemComp = null; altLocs = null; if ( performanceBehavior == PerformanceBehavior.BETTER_PERFORMANCE_MORE_MEMORY) - atomNameLookup = new HashMap(); + atomNameLookup = new HashMap<>(); else atomNameLookup = null; } - - /** - * returns true or false, depending if this group has 3D coordinates or not. - * @return true if Group has 3D coordinates - */ @Override public boolean has3D() { return pdb_flag; } - /** flag if group has 3D data. - * - * @param flag true to set flag that this Group has 3D coordinates - */ @Override public void setPDBFlag(boolean flag){ pdb_flag = flag ; } - /** Set three character name of Group . - * - * @param s a String specifying the PDBName value - * @see #getPDBName - */ @Override public void setPDBName(String s) { // hetatoms can have pdb_name length < 3. e.g. CU (see 1a4a position 1200 ) @@ -152,12 +138,6 @@ public void setPDBName(String s) { } - /** - * Returns the PDBName. - * - * @return a String representing the PDBName value - * @see #setPDBName - */ @Override public String getPDBName() { return pdb_name;} @@ -187,12 +167,8 @@ public void addAtom(Atom atom){ logger.warn("An atom with name " + atom.getName() + " " + altLocStr + " is already present in group: " + this.toString() + ". The atom with serial " + atom.getPDBserial() + " will be ignored in look-ups."); } } - }; - + } - /** remove all atoms - * - */ @Override public void clearAtoms() { atoms.clear(); @@ -245,8 +221,7 @@ public Atom getAtom(String name) { if ( atomNameLookup != null) return atomNameLookup.get(name); else { - /** This is the performance penalty we pay for NOT using the atomnameLookup in PerformanceBehaviour.LESS_MEMORY_SLOWER_PERFORMANCE - */ + // This is the performance penalty we pay for NOT using the atomnameLookup in PerformanceBehaviour.LESS_MEMORY_SLOWER_PERFORMANCE for (Atom a : atoms) { if (a.getName().equals(name)) { return a; @@ -588,7 +563,7 @@ public boolean hasAltLoc() { @Override public List getAltLocs() { if ( altLocs == null) - return new ArrayList(); + return new ArrayList<>(); return altLocs; } @@ -629,7 +604,7 @@ public Group getAltLocGroup(Character altLoc) { @Override public void addAltLoc(Group group) { if ( altLocs == null) { - altLocs = new ArrayList(); + altLocs = new ArrayList<>(); } altLocs.add(group); @@ -663,10 +638,10 @@ public void trimToSize(){ } // now let's fit the hashmaps to size - properties = new HashMap(properties); + properties = new HashMap<>(properties); if ( atomNameLookup != null) - atomNameLookup = new HashMap(atomNameLookup); + atomNameLookup = new HashMap<>(atomNameLookup); } diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/BondMaker.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/BondMaker.java index 2b184063c5..0715cf4792 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/BondMaker.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/BondMaker.java @@ -200,10 +200,7 @@ private void formIntraResidueBonds() { // Now add support for altLocGroup List totList = new ArrayList(); totList.add(mainGroup); - for(Group altLoc: mainGroup.getAltLocs()){ - totList.add(altLoc); - } - + totList.addAll(mainGroup.getAltLocs()); // Now iterate through this list for(Group group : totList){ @@ -216,15 +213,19 @@ private void formIntraResidueBonds() { Atom a = getAtom(chemCompBond.getAtom_id_1(), group); Atom b = getAtom(chemCompBond.getAtom_id_2(), group); if ( a != null && b != null){ + + // if they are different altlocs there must be no bond + if (a.getAltLoc() != b.getAltLoc()) + continue; + int bondOrder = chemCompBond.getNumericalBondOrder(); logger.debug("Forming bond between atoms {}-{} and {}-{} with bond order {}", a.getPDBserial(), a.getName(), b.getPDBserial(), b.getName(), bondOrder); new BondImpl(a, b, bondOrder); } - else{ - // Some of the atoms were missing. That's fine, there's - // nothing to do in this case. - } + // Else: Some of the atoms were missing. That's fine, there's + // nothing to do in this case. + } } } @@ -235,6 +236,7 @@ private void formIntraResidueBonds() { private Atom getAtom(String atomId, Group group) { Atom a = group.getAtom(atomId); + // Check for deuteration if(a==null && atomId.startsWith("H")) { a = group.getAtom(atomId.replaceFirst("H", "D")); diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/TestAltLocs.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/TestAltLocs.java index 7224a29247..37e4ef62ea 100644 --- a/biojava-structure/src/test/java/org/biojava/nbio/structure/TestAltLocs.java +++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/TestAltLocs.java @@ -754,4 +754,97 @@ public void testMmcifConversionAllAltlocs() throws IOException { } + /** + * Test that bonds between alt locs link atoms with same altloc codes + * https://github.com/rcsb/mmtf/issues/44 + */ + @Test + public void testBondsBetweenAltlocs() throws IOException { + String mmcifData = + "data_test\n" + + "loop_\n" + + "_atom_site.group_PDB \n" + + "_atom_site.id \n" + + "_atom_site.type_symbol \n" + + "_atom_site.label_atom_id \n" + + "_atom_site.label_alt_id \n" + + "_atom_site.label_comp_id \n" + + "_atom_site.label_asym_id \n" + + "_atom_site.label_entity_id \n" + + "_atom_site.label_seq_id \n" + + "_atom_site.pdbx_PDB_ins_code \n" + + "_atom_site.Cartn_x \n" + + "_atom_site.Cartn_y \n" + + "_atom_site.Cartn_z \n" + + "_atom_site.occupancy \n" + + "_atom_site.B_iso_or_equiv \n" + + "_atom_site.pdbx_formal_charge \n" + + "_atom_site.auth_seq_id \n" + + "_atom_site.auth_comp_id \n" + + "_atom_site.auth_asym_id \n" + + "_atom_site.auth_atom_id \n" + + "_atom_site.pdbx_PDB_model_num \n" + + "ATOM 1405 N N A MET A 1 86 ? 10.748 -17.610 -6.975 0.47 16.12 ? 104 MET A N 1 \n" + + "ATOM 1406 N N B MET A 1 86 ? 10.802 -17.694 -6.986 0.53 17.92 ? 104 MET A N 1 \n" + + "ATOM 1407 C CA A MET A 1 86 ? 11.189 -17.392 -5.610 0.47 15.78 ? 104 MET A CA 1 \n" + + "ATOM 1408 C CA B MET A 1 86 ? 11.033 -17.368 -5.587 0.53 18.29 ? 104 MET A CA 1 \n" + + "ATOM 1409 C C A MET A 1 86 ? 10.952 -18.663 -4.810 0.47 15.91 ? 104 MET A C 1 \n" + + "ATOM 1410 C C B MET A 1 86 ? 10.882 -18.643 -4.767 0.53 17.40 ? 104 MET A C 1 \n" + + "ATOM 1411 O O A MET A 1 86 ? 10.120 -19.504 -5.154 0.47 18.21 ? 104 MET A O 1 \n" + + "ATOM 1412 O O B MET A 1 86 ? 10.018 -19.474 -5.052 0.53 20.02 ? 104 MET A O 1 \n" + + "ATOM 1413 C CB A MET A 1 86 ? 10.477 -16.204 -4.933 0.47 17.14 ? 104 MET A CB 1 \n" + + "ATOM 1414 C CB B MET A 1 86 ? 10.001 -16.336 -5.111 0.53 18.92 ? 104 MET A CB 1 \n" + + "ATOM 1415 C CG A MET A 1 86 ? 9.019 -16.476 -4.619 0.47 20.01 ? 104 MET A CG 1 \n" + + "ATOM 1416 C CG B MET A 1 86 ? 10.030 -16.038 -3.634 0.53 19.12 ? 104 MET A CG 1 \n" + + "ATOM 1417 S SD A MET A 1 86 ? 8.207 -15.088 -3.838 0.47 22.06 ? 104 MET A SD 1 \n" + + "ATOM 1418 S SD B MET A 1 86 ? 8.874 -14.724 -3.205 0.53 20.16 ? 104 MET A SD 1 \n" + + "ATOM 1419 C CE A MET A 1 86 ? 9.151 -14.973 -2.340 0.47 25.15 ? 104 MET A CE 1 \n" + + "ATOM 1420 C CE B MET A 1 86 ? 7.269 -15.536 -3.380 0.53 20.38 ? 104 MET A CE 1 \n" + + "ATOM 1421 H H A MET A 1 86 ? 9.931 -18.207 -7.055 0.47 15.58 ? 104 MET A H 1 \n" + + "ATOM 1422 H H B MET A 1 86 ? 10.144 -18.461 -7.109 0.53 18.91 ? 104 MET A H 1 \n" + + "ATOM 1423 H HA A MET A 1 86 ? 12.256 -17.182 -5.644 0.47 15.14 ? 104 MET A HA 1 \n" + + "ATOM 1424 H HA B MET A 1 86 ? 12.033 -16.953 -5.465 0.53 19.55 ? 104 MET A HA 1 \n" + + "ATOM 1425 H HB2 A MET A 1 86 ? 10.986 -15.920 -4.008 0.47 17.68 ? 104 MET A HB2 1 \n" + + "ATOM 1426 H HB3 A MET A 1 86 ? 10.484 -15.364 -5.622 0.47 17.68 ? 104 MET A HB3 1 \n" + + "ATOM 1427 H HB3 B MET A 1 86 ? 9.001 -16.676 -5.398 0.53 20.49 ? 104 MET A HB3 1 \n" + + "ATOM 1428 H HG2 A MET A 1 86 ? 8.490 -16.704 -5.546 0.47 20.93 ? 104 MET A HG2 1 \n" + + "ATOM 1429 H HG3 A MET A 1 86 ? 8.956 -17.315 -3.927 0.47 20.93 ? 104 MET A HG3 1 \n" + + "ATOM 1430 H HE2 A MET A 1 86 ? 9.861 -14.153 -2.440 0.47 27.31 ? 104 MET A HE2 1 \n" + + "ATOM 1431 H HE2 B MET A 1 86 ? 7.346 -16.554 -2.998 0.53 23.03 ? 104 MET A HE2 1 \n" + + "ATOM 1432 H HE3 B MET A 1 86 ? 6.996 -15.566 -4.437 0.53 23.03 ? 104 MET A HE3 1 "; + + SimpleMMcifParser parser = new SimpleMMcifParser(); + SimpleMMcifConsumer consumer = new SimpleMMcifConsumer(); + parser.addMMcifConsumer(consumer); + + FileParsingParameters params = new FileParsingParameters(); + params.setCreateAtomBonds(true); + consumer.setFileParsingParameters(params); + + BufferedReader buf = new BufferedReader(new StringReader(mmcifData)); + parser.parse(buf); + buf.close(); + + Structure s = consumer.getStructure(); + Chain c = s.getPolyChains().get(0); + assertEquals(1, c.getAtomGroups().size()); + + Group g = c.getAtomGroup(0); + + assertEquals(1, g.getAltLocs().size()); + + for (Atom a : g.getAtoms()) { + for (Bond b : a.getBonds()) { +// if (b.getAtomA().getAltLoc() != b.getAtomB().getAltLoc()) { +// System.out.println( +// b.getAtomA().toString() + ":" + b.getAtomA().getAltLoc() + " --- " + +// b.getAtomB().toString() + ":" + b.getAtomB().getAltLoc()); +// } + assertEquals(b.getAtomA().toString() + " --- " + b.getAtomB().toString(), + b.getAtomA().getAltLoc(), b.getAtomB().getAltLoc()); + } + } + + } + } From 408c8accefb0863507e291b418599ec450667ff5 Mon Sep 17 00:00:00 2001 From: Jose Duarte Date: Sat, 9 Nov 2019 22:43:37 -0800 Subject: [PATCH 2/8] Extending test --- .../java/org/biojava/nbio/structure/TestAltLocs.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/TestAltLocs.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/TestAltLocs.java index 37e4ef62ea..28db8cd8f0 100644 --- a/biojava-structure/src/test/java/org/biojava/nbio/structure/TestAltLocs.java +++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/TestAltLocs.java @@ -833,6 +833,7 @@ public void testBondsBetweenAltlocs() throws IOException { assertEquals(1, g.getAltLocs().size()); + boolean foundCEHE3bond = false; for (Atom a : g.getAtoms()) { for (Bond b : a.getBonds()) { // if (b.getAtomA().getAltLoc() != b.getAtomB().getAltLoc()) { @@ -840,11 +841,21 @@ public void testBondsBetweenAltlocs() throws IOException { // b.getAtomA().toString() + ":" + b.getAtomA().getAltLoc() + " --- " + // b.getAtomB().toString() + ":" + b.getAtomB().getAltLoc()); // } + // no bonds between atoms with different alt locs assertEquals(b.getAtomA().toString() + " --- " + b.getAtomB().toString(), b.getAtomA().getAltLoc(), b.getAtomB().getAltLoc()); + + // a bond should exist between CE and HE3 but only for altloc=B + if ((b.getAtomA().getName().equals("CE") && b.getAtomB().getName().equals("HE3")) || + (b.getAtomA().getName().equals("HE3") && b.getAtomB().getName().equals("CE")) ) { + foundCEHE3bond = true; + } } } + // there should be a bond between CE and HE3 but only for altloc=B + assertTrue(foundCEHE3bond); + } } From 7c7caa274420e3399d4c969b0feb795979e6d71d Mon Sep 17 00:00:00 2001 From: Jose Duarte Date: Mon, 11 Nov 2019 21:29:52 -0800 Subject: [PATCH 3/8] Now all tests pass --- .../java/org/biojava/nbio/structure/io/BondMaker.java | 9 +++++++-- .../java/org/biojava/nbio/structure/TestAltLocs.java | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/BondMaker.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/BondMaker.java index 0715cf4792..f829f22718 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/BondMaker.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/BondMaker.java @@ -214,9 +214,14 @@ private void formIntraResidueBonds() { Atom b = getAtom(chemCompBond.getAtom_id_2(), group); if ( a != null && b != null){ - // if they are different altlocs there must be no bond - if (a.getAltLoc() != b.getAltLoc()) + // if they are different altlocs (when different from the '.' case) there must be no bond + if (a.getAltLoc() != null && b.getAltLoc()!=null && + a.getAltLoc()!=' ' && b.getAltLoc()!=' ' && + a.getAltLoc() != b.getAltLoc()) { + logger.debug("Skipping bond between atoms with differently named alt locs {} (altLoc '{}') -- {} (altLoc '{}')", + a.toString(), a.getAltLoc(), b.toString(), b.getAltLoc()); continue; + } int bondOrder = chemCompBond.getNumericalBondOrder(); logger.debug("Forming bond between atoms {}-{} and {}-{} with bond order {}", diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/TestAltLocs.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/TestAltLocs.java index 28db8cd8f0..c1e38280c6 100644 --- a/biojava-structure/src/test/java/org/biojava/nbio/structure/TestAltLocs.java +++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/TestAltLocs.java @@ -838,8 +838,8 @@ public void testBondsBetweenAltlocs() throws IOException { for (Bond b : a.getBonds()) { // if (b.getAtomA().getAltLoc() != b.getAtomB().getAltLoc()) { // System.out.println( -// b.getAtomA().toString() + ":" + b.getAtomA().getAltLoc() + " --- " + -// b.getAtomB().toString() + ":" + b.getAtomB().getAltLoc()); +// b.getAtomA().toString() + ": '" + b.getAtomA().getAltLoc() + "' --- " + +// b.getAtomB().toString() + ": '" + b.getAtomB().getAltLoc() + "'"); // } // no bonds between atoms with different alt locs assertEquals(b.getAtomA().toString() + " --- " + b.getAtomB().toString(), From 0ff1823543a133e42e7f09e32003fca4a0fd894f Mon Sep 17 00:00:00 2001 From: Jose Duarte Date: Mon, 11 Nov 2019 21:39:04 -0800 Subject: [PATCH 4/8] Clarifying docs and some more cleanup --- .../java/org/biojava/nbio/structure/Atom.java | 3 +- .../org/biojava/nbio/structure/AtomImpl.java | 9 ------ .../biojava/nbio/structure/HetatomImpl.java | 30 +------------------ 3 files changed, 3 insertions(+), 39 deletions(-) diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/Atom.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/Atom.java index ebd67c0a8c..7b1c722bd9 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/Atom.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/Atom.java @@ -163,7 +163,8 @@ public interface Atom extends Cloneable, PDBRecord { /** * Get alternate Location. - * @return a Character object representing the alt loc value + * @return a Character object representing the alt loc value. Default altLoc ('.' in mmCIF files) + * is represented by ' ' (space character, ascii 32). * @see #setAltLoc */ public Character getAltLoc(); diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/AtomImpl.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/AtomImpl.java index a270b06983..36ccf49efb 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/AtomImpl.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/AtomImpl.java @@ -157,10 +157,6 @@ public void setZ(double z) { @Override public double getZ() { return coords.z; } - /** - * Set alternate Location. - * @see #getAltLoc - */ @Override public void setAltLoc(Character c) { // after changing altLoc from Character to char, we do this to keep the interface the same as it used to be - JD 2016-01-27 @@ -170,11 +166,6 @@ public void setAltLoc(Character c) { altLoc = c ; } - /** - * Get alternate Location. - * @return a Character object representing the alt loc value - * @see #setAltLoc - */ @Override public Character getAltLoc() { // after changing altLoc from Character to char, we do this to keep the interface the same as it used to be - JD 2016-01-27 diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/HetatomImpl.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/HetatomImpl.java index cc5356b942..037dcb2b0b 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/HetatomImpl.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/HetatomImpl.java @@ -254,16 +254,13 @@ public boolean hasAtom(String fullName) { Atom a = atomNameLookup.get(fullName.trim()); return a != null; } else { - /** This is the performance penalty we pay for NOT using the atomnameLookup in PerformanceBehaviour.LESS_MEMORY_SLOWER_PERFORMANCE - */ + // This is the performance penalty we pay for NOT using the atomnameLookup in PerformanceBehaviour.LESS_MEMORY_SLOWER_PERFORMANCE for (Atom a : atoms) { if (a.getName().equals(fullName)) { return true; } } return false; - - } } @@ -375,42 +372,21 @@ public void setProperties(Map props) { properties = props ; } - /** return properties. - * - * @return a HashMap object representing the properties value - * @see #setProperties - */ @Override public Map getProperties() { return properties ; } - /** set a single property . - * - * @see #getProperties - * @see #getProperty - */ @Override public void setProperty(String key, Object value){ properties.put(key,value); } - /** get a single property . - * @param key a String - * @return an Object - * @see #setProperty - * @see #setProperties - */ @Override public Object getProperty(String key){ return properties.get(key); } - - /** return an AtomIterator. - * - * @return an Iterator object - */ @Override public Iterator iterator() { return new AtomIterator(this); @@ -615,10 +591,6 @@ public boolean isWater() { return GroupType.WATERNAMES.contains(pdb_name); } - /** attempts to reduce the memory imprint of this group by trimming - * all internal Collection objects to the required size. - * - */ @Override public void trimToSize(){ From a29f88187b6871d21bfc5cab9918a95026ec4b28 Mon Sep 17 00:00:00 2001 From: Jose Duarte Date: Tue, 12 Nov 2019 17:01:37 -0800 Subject: [PATCH 5/8] Covering another alt loc bond edge case --- .../biojava/nbio/structure/io/BondMaker.java | 45 +++-- .../io/mmcif/SimpleMMcifConsumer.java | 4 +- .../biojava/nbio/structure/TestAltLocs.java | 167 +++++++++++++++++- 3 files changed, 200 insertions(+), 16 deletions(-) diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/BondMaker.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/BondMaker.java index f829f22718..3689f459e3 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/BondMaker.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/BondMaker.java @@ -128,30 +128,51 @@ private void formPeptideBonds() { continue; } - Atom carboxylC; - Atom aminoN; + List carboxylCs = getAtoms(tail, "C"); + List aminoNs = getAtoms(head, "N"); - carboxylC = tail.getC(); - aminoN = head.getN(); - - - if (carboxylC == null || aminoN == null) { + if (carboxylCs.isEmpty() || aminoNs.isEmpty()) { // some structures may be incomplete and not store info // about all of their atoms - continue; } - - if (Calc.getDistance(carboxylC, aminoN) < MAX_PEPTIDE_BOND_LENGTH) { - new BondImpl(carboxylC, aminoN, 1); + for (Atom carboxylC:carboxylCs) { + for (Atom aminoN:aminoNs) { + if (carboxylC.getAltLoc() != null && aminoN.getAltLoc()!=null && + carboxylC.getAltLoc()!=' ' && aminoN.getAltLoc()!=' ' && + carboxylC.getAltLoc() != aminoN.getAltLoc()) { + logger.debug("Skipping peptide bond between atoms with differently named alt locs {} (altLoc '{}') -- {} (altLoc '{}')", + carboxylC.toString(), carboxylC.getAltLoc(), aminoN.toString(), aminoN.getAltLoc()); + continue; + } + if (Calc.getDistance(carboxylC, aminoN) < MAX_PEPTIDE_BOND_LENGTH) { + new BondImpl(carboxylC, aminoN, 1); + } + } } - } } } } + /** + * Get all atoms (including possible alt locs) in given group that are name with the given atom name + * @param g the group + * @param name the atom name + * @return list of all atoms, or empty list if no atoms with the name + */ + private List getAtoms(Group g, String name) { + List atoms = new ArrayList<>(); + List groupsWithAltLocs = new ArrayList<>(); + groupsWithAltLocs.add(g); + groupsWithAltLocs.addAll(g.getAltLocs()); + for (Group group : groupsWithAltLocs) { + atoms.add(group.getAtom(name)); + } + return atoms; + } + private void formNucleotideBonds() { for (int modelInd=0; modelInd bonds = new ArrayList<>(); + for (Bond b : catom.getBonds()) { + if (b.getAtomA().getName().equals("N") || b.getAtomB().getName().equals("N")) { + bonds.add(b); + } + } + + assertEquals(2, bonds.size()); + + Set seenAltLocs = new HashSet<>(); + for (Bond b : bonds) { + Atom aAtom = b.getAtomA(); + Atom bAtom = b.getAtomB(); + Atom nAtom; + if (aAtom.getName().equals("N")) { + nAtom = aAtom; + } else { + nAtom = bAtom; + } + seenAltLocs.add(nAtom.getAltLoc()); + } + // 2 distinct N atoms: alt loc A and B + assertEquals(2, seenAltLocs.size()); + assertTrue(seenAltLocs.contains('A')); + assertTrue(seenAltLocs.contains('B')); + + } + + } From b889140cee99cd5b1745c6b4c4059e487d576489 Mon Sep 17 00:00:00 2001 From: Jose Duarte Date: Tue, 12 Nov 2019 21:46:40 -0800 Subject: [PATCH 6/8] Extracted common method, now covering nucleotide bonds --- .../biojava/nbio/structure/io/BondMaker.java | 103 +++++++++--------- 1 file changed, 54 insertions(+), 49 deletions(-) diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/BondMaker.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/BondMaker.java index 3689f459e3..beed12e2e2 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/BondMaker.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/BondMaker.java @@ -128,51 +128,12 @@ private void formPeptideBonds() { continue; } - List carboxylCs = getAtoms(tail, "C"); - List aminoNs = getAtoms(head, "N"); - - if (carboxylCs.isEmpty() || aminoNs.isEmpty()) { - // some structures may be incomplete and not store info - // about all of their atoms - continue; - } - - for (Atom carboxylC:carboxylCs) { - for (Atom aminoN:aminoNs) { - if (carboxylC.getAltLoc() != null && aminoN.getAltLoc()!=null && - carboxylC.getAltLoc()!=' ' && aminoN.getAltLoc()!=' ' && - carboxylC.getAltLoc() != aminoN.getAltLoc()) { - logger.debug("Skipping peptide bond between atoms with differently named alt locs {} (altLoc '{}') -- {} (altLoc '{}')", - carboxylC.toString(), carboxylC.getAltLoc(), aminoN.toString(), aminoN.getAltLoc()); - continue; - } - if (Calc.getDistance(carboxylC, aminoN) < MAX_PEPTIDE_BOND_LENGTH) { - new BondImpl(carboxylC, aminoN, 1); - } - } - } + formBondAltlocAware(tail, "C", head, "N", MAX_PEPTIDE_BOND_LENGTH, 1); } } } } - /** - * Get all atoms (including possible alt locs) in given group that are name with the given atom name - * @param g the group - * @param name the atom name - * @return list of all atoms, or empty list if no atoms with the name - */ - private List getAtoms(Group g, String name) { - List atoms = new ArrayList<>(); - List groupsWithAltLocs = new ArrayList<>(); - groupsWithAltLocs.add(g); - groupsWithAltLocs.addAll(g.getAltLocs()); - for (Group group : groupsWithAltLocs) { - atoms.add(group.getAtom(name)); - } - return atoms; - } - private void formNucleotideBonds() { for (int modelInd=0; modelInd a1s = getAtoms(g1, name1); + List a2s = getAtoms(g2, name2); - if (Calc.getDistance(phosphorous, oThreePrime) < MAX_NUCLEOTIDE_BOND_LENGTH) { - new BondImpl(phosphorous, oThreePrime, 1); - } + if (a1s.isEmpty() || a2s.isEmpty()) { + // some structures may be incomplete and not store info + // about all of their atoms + return; + } + for (Atom a1:a1s) { + for (Atom a2:a2s) { + if (a1.getAltLoc() != null && a2.getAltLoc()!=null && + a1.getAltLoc()!=' ' && a2.getAltLoc()!=' ' && + a1.getAltLoc() != a2.getAltLoc()) { + logger.debug("Skipping bond between atoms with differently named alt locs {} (altLoc '{}') -- {} (altLoc '{}')", + a1.toString(), a1.getAltLoc(), a2.toString(), a2.getAltLoc()); + continue; + } + if (Calc.getDistance(a1, a2) < maxAllowedLength) { + new BondImpl(a1, a2, bondOrder); } } } } + /** + * Get all atoms (including possible alt locs) in given group that are name with the given atom name + * @param g the group + * @param name the atom name + * @return list of all atoms, or empty list if no atoms with the name + */ + private List getAtoms(Group g, String name) { + List atoms = new ArrayList<>(); + List groupsWithAltLocs = new ArrayList<>(); + groupsWithAltLocs.add(g); + groupsWithAltLocs.addAll(g.getAltLocs()); + for (Group group : groupsWithAltLocs) { + Atom a = group.getAtom(name); + if (a!=null) + atoms.add(a); + } + return atoms; + } + private void formIntraResidueBonds() { for (int modelInd=0; modelInd Date: Tue, 12 Nov 2019 22:19:28 -0800 Subject: [PATCH 7/8] Consolidating on a single method call for all alt loc bond business --- .../biojava/nbio/structure/io/BondMaker.java | 127 ++++++++---------- 1 file changed, 57 insertions(+), 70 deletions(-) diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/BondMaker.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/BondMaker.java index beed12e2e2..bb92724aa4 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/BondMaker.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/BondMaker.java @@ -159,14 +159,48 @@ private void formNucleotideBonds() { } } + private void formIntraResidueBonds() { + for (int modelInd=0; modelInd groups = chain.getAtomGroups(); + for (Group mainGroup : groups) { + // atoms with no residue number don't have atom information + if (mainGroup.getResidueNumber() == null) { + continue; + } + // Now add support for altLocGroup + List totList = new ArrayList(); + totList.add(mainGroup); + totList.addAll(mainGroup.getAltLocs()); + + // Now iterate through this list + for(Group group : totList){ + + ChemComp aminoChemComp = ChemCompGroupFactory.getChemComp(group.getPDBName()); + logger.debug("chemcomp for residue {}-{} has {} atoms and {} bonds", + group.getPDBName(), group.getResidueNumber(), aminoChemComp.getAtoms().size(), aminoChemComp.getBonds().size()); + + for (ChemCompBond chemCompBond : aminoChemComp.getBonds()) { + // note we don't check distance to make this call not too expensive + formBondAltlocAware(group, chemCompBond.getAtom_id_1(), + group, chemCompBond.getAtom_id_2(), -1, chemCompBond.getNumericalBondOrder()); + } + } + } + } + + } + } + /** * Form bond between atoms of the given names and groups, respecting alt loc rules to form bonds: - * no bonds between differently named alt locs (not default) and all bonds for default alt loc to named alt loc. + * no bonds between differently named alt locs (that are not the default alt loc '.') + * and multiple bonds for default alt loc to named alt loc. * @param g1 first group * @param name1 name of atom in first group * @param g2 second group * @param name2 name of atom in second group - * @param maxAllowedLength max length, if atoms distance above this length no bond will be added + * @param maxAllowedLength max length, if atoms distance above this length no bond will be added. If negative no check on distance is performed. * @param bondOrder the bond order to be set in the created bond(s) */ private void formBondAltlocAware(Group g1, String name1, Group g2, String name2, double maxAllowedLength, int bondOrder) { @@ -188,8 +222,20 @@ private void formBondAltlocAware(Group g1, String name1, Group g2, String name2, a1.toString(), a1.getAltLoc(), a2.toString(), a2.getAltLoc()); continue; } - if (Calc.getDistance(a1, a2) < maxAllowedLength) { + if (maxAllowedLength<0) { + // negative maxAllowedLength means we don't check distance and always add bond + logger.debug("Forming bond between atoms {}-{} and {}-{} with bond order {}", + a1.getPDBserial(), a1.getName(), a2.getPDBserial(), a2.getName(), bondOrder); new BondImpl(a1, a2, bondOrder); + } else { + if (Calc.getDistance(a1, a2) < maxAllowedLength) { + logger.debug("Forming bond between atoms {}-{} and {}-{} with bond order {}. Distance is below {}", + a1.getPDBserial(), a1.getName(), a2.getPDBserial(), a2.getName(), bondOrder, maxAllowedLength); + new BondImpl(a1, a2, bondOrder); + } else { + logger.debug("Not forming bond between atoms {}-{} and {}-{} with bond order {}, because distance is above {}", + a1.getPDBserial(), a1.getName(), a2.getPDBserial(), a2.getName(), bondOrder, maxAllowedLength); + } } } } @@ -208,77 +254,18 @@ private List getAtoms(Group g, String name) { groupsWithAltLocs.addAll(g.getAltLocs()); for (Group group : groupsWithAltLocs) { Atom a = group.getAtom(name); - if (a!=null) - atoms.add(a); - } - return atoms; - } - - private void formIntraResidueBonds() { - for (int modelInd=0; modelInd groups = chain.getAtomGroups(); - for (Group mainGroup : groups) { - // atoms with no residue number don't have atom information - if (mainGroup.getResidueNumber() == null) { - continue; - } - // Now add support for altLocGroup - List totList = new ArrayList(); - totList.add(mainGroup); - totList.addAll(mainGroup.getAltLocs()); - - // Now iterate through this list - for(Group group : totList){ - - ChemComp aminoChemComp = ChemCompGroupFactory.getChemComp(group.getPDBName()); - logger.debug("chemcomp for residue {}-{} has {} atoms and {} bonds", - group.getPDBName(), group.getResidueNumber(), aminoChemComp.getAtoms().size(), aminoChemComp.getBonds().size()); - - for (ChemCompBond chemCompBond : aminoChemComp.getBonds()) { - Atom a = getAtom(chemCompBond.getAtom_id_1(), group); - Atom b = getAtom(chemCompBond.getAtom_id_2(), group); - if ( a != null && b != null){ - - // if they are different altlocs (when different from the '.' case) there must be no bond - if (a.getAltLoc() != null && b.getAltLoc()!=null && - a.getAltLoc()!=' ' && b.getAltLoc()!=' ' && - a.getAltLoc() != b.getAltLoc()) { - logger.debug("Skipping bond between atoms with differently named alt locs {} (altLoc '{}') -- {} (altLoc '{}')", - a.toString(), a.getAltLoc(), b.toString(), b.getAltLoc()); - continue; - } - - int bondOrder = chemCompBond.getNumericalBondOrder(); - logger.debug("Forming bond between atoms {}-{} and {}-{} with bond order {}", - a.getPDBserial(), a.getName(), b.getPDBserial(), b.getName(), bondOrder); - new BondImpl(a, b, bondOrder); - } - // Else: Some of the atoms were missing. That's fine, there's - // nothing to do in this case. - - } - } - } - } - - } - } - - private Atom getAtom(String atomId, Group group) { - Atom a = group.getAtom(atomId); - - // Check for deuteration - if(a==null && atomId.startsWith("H")) { - a = group.getAtom(atomId.replaceFirst("H", "D")); - // Check it is actually deuterated - if(a!=null){ - if(!a.getElement().equals(Element.D)){ + // Check for deuteration + if (a==null && name.startsWith("H")) { + a = group.getAtom(name.replaceFirst("H", "D")); + // Check it is actually deuterated + if (a!=null && !a.getElement().equals(Element.D)){ a=null; } } + if (a!=null) + atoms.add(a); } - return a; + return atoms; } private void trimBondLists() { From c3633966cba32a6b007025f29e0675459d9a30e6 Mon Sep 17 00:00:00 2001 From: Jose Duarte Date: Tue, 12 Nov 2019 22:26:28 -0800 Subject: [PATCH 8/8] Docs --- .../main/java/org/biojava/nbio/structure/Group.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/Group.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/Group.java index 6a169dbee5..f2bc5a506a 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/Group.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/Group.java @@ -103,12 +103,13 @@ public interface Group extends Serializable { /** * Set the atoms of this group. - * @see {@link Atom} + * @see Atom * @param atoms a list of atoms */ public void setAtoms(List atoms); - /** Remove all atoms from this group. + /** + * Remove all atoms from this group. * */ public void clearAtoms(); @@ -118,13 +119,14 @@ public interface Group extends Serializable { * Beware that some PDB atom names are ambiguous (e.g. CA, which means C-alpha or Calcium), * ambiguities should not occur within the same group though. To solve these ambiguities * one would need to check the atom returned for the required element with {@link Atom#getElement()} + *

+ * Note this method will return only the atom in the default alternative location (be it '.' or a letter). * * @param name a trimmed String representing the atom's PDB name, e.g. "CA" * @return an Atom object or null if no such atom exists within this group */ public Atom getAtom(String name) ; - - + /** * Get at atom by position. *