Skip to content

Commit 01180f4

Browse files
authored
Merge pull request biojava#938 from biojava/issue931
Fix for issue 931
2 parents c804c41 + 8c9ce2c commit 01180f4

3 files changed

Lines changed: 50 additions & 44 deletions

File tree

biojava-structure/src/main/java/org/biojava/nbio/structure/io/EntityFinder.java

Lines changed: 24 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@
4343
import java.util.Comparator;
4444
import java.util.HashMap;
4545
import java.util.List;
46-
import java.util.Map;
4746
import java.util.Set;
4847
import java.util.TreeMap;
4948
import java.util.TreeSet;
@@ -91,9 +90,7 @@ public class EntityFinder {
9190
public static List<EntityInfo> findPolyEntities(List<List<Chain>> polyModels) {
9291
TreeMap<String,EntityInfo> chainIds2entities = findEntitiesFromAlignment(polyModels);
9392

94-
List<EntityInfo> entities = findUniqueEntities(chainIds2entities);
95-
96-
return entities;
93+
return findUniqueEntities(chainIds2entities);
9794
}
9895

9996
/**
@@ -102,7 +99,7 @@ public static List<EntityInfo> findPolyEntities(List<List<Chain>> polyModels) {
10299
*/
103100
private static List<EntityInfo> findUniqueEntities(TreeMap<String,EntityInfo> chainIds2entities) {
104101

105-
List<EntityInfo> list = new ArrayList<EntityInfo>();
102+
List<EntityInfo> list = new ArrayList<>();
106103

107104
for (EntityInfo cluster:chainIds2entities.values()) {
108105
boolean present = false;
@@ -131,12 +128,7 @@ public static void createPurelyNonPolyEntities(List<List<Chain>> nonPolyModels,
131128
// let's find first the max entity id to assign entity ids to the newly found entities
132129
int maxMolId = 0;
133130
if (!entities.isEmpty()) {
134-
maxMolId = Collections.max(entities, new Comparator<EntityInfo>() {
135-
@Override
136-
public int compare(EntityInfo o1, EntityInfo o2) {
137-
return new Integer(o1.getMolId()).compareTo(o2.getMolId());
138-
}
139-
}).getMolId();
131+
maxMolId = Collections.max(entities, Comparator.comparingInt(EntityInfo::getMolId)).getMolId();
140132
}
141133
// we go one over the max
142134
int molId = maxMolId + 1;
@@ -181,7 +173,6 @@ public int compare(EntityInfo o1, EntityInfo o2) {
181173

182174
}
183175

184-
185176
}
186177

187178
private static EntityInfo findNonPolyEntityWithDescription(String description, List<EntityInfo> nonPolyEntities) {
@@ -221,7 +212,6 @@ private static boolean areResNumbersAligned(Chain c1, Chain c2) {
221212
} catch (StructureException e) {
222213
// the group doesn't exist (no density) in the chain, go on
223214
countNonExisting++;
224-
continue;
225215
}
226216
}
227217

@@ -238,39 +228,34 @@ private static boolean areResNumbersAligned(Chain c1, Chain c2) {
238228

239229
private static TreeMap<String,EntityInfo> findEntitiesFromAlignment(List<List<Chain>> polyModels) {
240230

241-
242-
243-
TreeMap<String, EntityInfo> chainIds2entities = new TreeMap<String,EntityInfo>();
231+
TreeMap<String, EntityInfo> chainIds2entities = new TreeMap<>();
244232

245233
if (polyModels.isEmpty()) return chainIds2entities;
246234

247-
Set<Integer> polyChainIndices = new TreeSet<Integer>();
235+
Set<Integer> polyChainIndices = new TreeSet<>();
248236
for (int i=0;i<polyModels.get(0).size();i++) {
249237
polyChainIndices.add(i);
250238
}
251239

252-
253240
int molId = 1;
254241

255242
outer:
256243
for (int i:polyChainIndices) {
244+
Chain c1 = polyModels.get(0).get(i);
245+
// here we use false, which means that X will be used for unknown compounds
246+
String str1 = SeqRes2AtomAligner.getFullAtomSequence(c1.getAtomGroups(), new HashMap<>(), false);
247+
257248
for (int j:polyChainIndices) {
258249

259250
if (j<=i) continue;
260251

261-
Chain c1 = polyModels.get(0).get(i);
262252
Chain c2 = polyModels.get(0).get(j);
263-
264-
Map<Integer,Integer> positionIndex1 = new HashMap<Integer, Integer>();
265-
Map<Integer,Integer> positionIndex2 = new HashMap<Integer, Integer>();
266-
// here we use false, which means that X will be used for unknown compounds
267-
String str1 = SeqRes2AtomAligner.getFullAtomSequence(c1.getAtomGroups(), positionIndex1, false);
268-
String str2 = SeqRes2AtomAligner.getFullAtomSequence(c2.getAtomGroups(), positionIndex2, false);
253+
String str2 = SeqRes2AtomAligner.getFullAtomSequence(c2.getAtomGroups(), new HashMap<>(), false);
269254

270255
int seq1Length = 0;
271256
int seq2Length = 0;
272257

273-
SequencePair<?,?> pair = null;
258+
SequencePair<?,?> pair;
274259
if (isProteinSequence(str1) && isProteinSequence(str2)) {
275260
ProteinSequence s1 = getProteinSequence(str1);
276261
ProteinSequence s2 = getProteinSequence(str2);
@@ -296,11 +281,10 @@ private static TreeMap<String,EntityInfo> findEntitiesFromAlignment(List<List<Ch
296281
pair = alignRNA(s1,s2);
297282

298283
} else {
299-
logger.debug("Chains {},{} are either different kind of polymers or could not be recognized as protein or nucleotide polymers");
284+
logger.debug("Chains {},{} are either different kind of polymers or could not be recognized as protein or nucleotide polymers", c1.getId(), c2.getId());
300285
continue;
301286
}
302287

303-
304288
int numGaps = getNumGaps(pair);
305289
int numGaps1 = getNumGapsQuery(pair);
306290
int numGaps2 = getNumGapsTarget(pair);
@@ -318,7 +302,7 @@ private static TreeMap<String,EntityInfo> findEntitiesFromAlignment(List<List<Ch
318302
if (identity > IDENTITY_THRESHOLD && gapCov1<GAP_COVERAGE_THRESHOLD && gapCov2<GAP_COVERAGE_THRESHOLD) {
319303
if ( !chainIds2entities.containsKey(c1.getId()) &&
320304
!chainIds2entities.containsKey(c2.getId())) {
321-
logger.debug("Creating Compound with chains {},{}",c1.getId(),c2.getId());
305+
logger.debug("Creating entity with chains {},{}",c1.getId(),c2.getId());
322306

323307
EntityInfo ent = new EntityInfo();
324308
ent.addChain(c1);
@@ -330,27 +314,24 @@ private static TreeMap<String,EntityInfo> findEntitiesFromAlignment(List<List<Ch
330314
chainIds2entities.put(c1.getId(), ent);
331315
chainIds2entities.put(c2.getId(), ent);
332316

333-
334317
} else {
318+
Chain chainToAdd;
335319
EntityInfo ent = chainIds2entities.get(c1.getId());
336-
337320
if (ent==null) {
338-
logger.debug("Adding chain {} to entity {}",c1.getId(),c2.getId());
339321
ent = chainIds2entities.get(c2.getId());
340-
ent.addChain(c1);
341-
c1.setEntityInfo(ent);
342-
chainIds2entities.put(c1.getId(), ent);
343-
322+
chainToAdd = c1;
344323
} else {
345-
logger.debug("Adding chain {} to entity {}",c2.getId(),c1.getId());
346-
ent.addChain(c2);
347-
c2.setEntityInfo(ent);
348-
chainIds2entities.put(c2.getId(), ent);
349-
324+
chainToAdd = c2;
325+
}
326+
if (!chainIds2entities.containsKey(chainToAdd.getId())) {
327+
logger.debug("Adding chain {} to entity {}", chainToAdd.getId(), ent.getId());
328+
ent.addChain(chainToAdd);
329+
chainToAdd.setEntityInfo(ent);
330+
chainIds2entities.put(chainToAdd.getId(), ent);
350331
}
351332
}
352333
if (!areResNumbersAligned(c1, c2)) {
353-
logger.warn("Including 100% identical chains {},{} in same Compound, although they have misaligned residue numbers",
334+
logger.warn("Including 100% identical chains {},{} in same entity, although they have misaligned residue numbers",
354335
c1.getId(),c2.getId());
355336
}
356337
}
@@ -370,7 +351,7 @@ private static TreeMap<String,EntityInfo> findEntitiesFromAlignment(List<List<Ch
370351
for (int i:polyChainIndices) {
371352
Chain c = polyModels.get(0).get(i);
372353
if (!chainIds2entities.containsKey(c.getId())) {
373-
logger.debug("Creating a 1-member Compound for chain {}",c.getId());
354+
logger.debug("Creating a 1-member entity for chain {}",c.getId());
374355

375356
EntityInfo ent = new EntityInfo();
376357
ent.addChain(c);
@@ -392,7 +373,6 @@ private static TreeMap<String,EntityInfo> findEntitiesFromAlignment(List<List<Ch
392373
}
393374
}
394375

395-
396376
return chainIds2entities;
397377
}
398378

biojava-structure/src/test/java/org/biojava/nbio/structure/io/TestNonDepositedFiles.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,32 @@ public void testRefmacPdbFile() throws IOException {
300300

301301
}
302302

303+
/**
304+
* Making sure we find the right number of entities and that chains are assigned to entities correctly.
305+
* See https://github.com/biojava/biojava/issues/931
306+
*/
307+
@Test
308+
public void testIssue931() throws IOException {
309+
InputStream inStream = new GZIPInputStream(this.getClass().getResourceAsStream("/org/biojava/nbio/structure/io/3zyb_truncated.pdb.gz"));
310+
PDBFileParser pdbpars = new PDBFileParser();
311+
FileParsingParameters params = new FileParsingParameters();
312+
params.setAlignSeqRes(true);
313+
pdbpars.setFileParsingParameters(params);
314+
Structure s = pdbpars.parsePDBFile(inStream);
315+
316+
assertEquals(2, s.getEntityInfos().size());
317+
assertEquals(4, s.getEntityById(1).getChains().size());
318+
assertEquals(3, s.getEntityById(2).getChains().size());
319+
320+
assertSame(s.getEntityById(1), s.getPolyChains().get(0).getEntityInfo());
321+
assertSame(s.getEntityById(1), s.getPolyChains().get(1).getEntityInfo());
322+
assertSame(s.getEntityById(1), s.getPolyChains().get(2).getEntityInfo());
323+
assertSame(s.getEntityById(1), s.getPolyChains().get(3).getEntityInfo());
324+
assertSame(s.getEntityById(2), s.getPolyChains().get(4).getEntityInfo());
325+
assertSame(s.getEntityById(2), s.getPolyChains().get(5).getEntityInfo());
326+
assertSame(s.getEntityById(2), s.getPolyChains().get(6).getEntityInfo());
327+
}
328+
303329
/**
304330
* This test represents a common situation for a non-deposited structure.
305331
* When building with common crystallography software, the user often adds new
Binary file not shown.

0 commit comments

Comments
 (0)