Skip to content

Commit c83ece2

Browse files
committed
Fix and test for issue #931
1 parent 7b76f8f commit c83ece2

File tree

3 files changed

+47
-31
lines changed

3 files changed

+47
-31
lines changed

biojava-structure/src/main/java/org/biojava/nbio/structure/io/EntityFinder.java

Lines changed: 21 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -238,39 +238,34 @@ private static boolean areResNumbersAligned(Chain c1, Chain c2) {
238238

239239
private static TreeMap<String,EntityInfo> findEntitiesFromAlignment(List<List<Chain>> polyModels) {
240240

241-
242-
243-
TreeMap<String, EntityInfo> chainIds2entities = new TreeMap<String,EntityInfo>();
241+
TreeMap<String, EntityInfo> chainIds2entities = new TreeMap<>();
244242

245243
if (polyModels.isEmpty()) return chainIds2entities;
246244

247-
Set<Integer> polyChainIndices = new TreeSet<Integer>();
245+
Set<Integer> polyChainIndices = new TreeSet<>();
248246
for (int i=0;i<polyModels.get(0).size();i++) {
249247
polyChainIndices.add(i);
250248
}
251249

252-
253250
int molId = 1;
254251

255252
outer:
256253
for (int i:polyChainIndices) {
254+
Chain c1 = polyModels.get(0).get(i);
255+
// here we use false, which means that X will be used for unknown compounds
256+
String str1 = SeqRes2AtomAligner.getFullAtomSequence(c1.getAtomGroups(), new HashMap<>(), false);
257+
257258
for (int j:polyChainIndices) {
258259

259260
if (j<=i) continue;
260261

261-
Chain c1 = polyModels.get(0).get(i);
262262
Chain c2 = polyModels.get(0).get(j);
263-
264-
Map<Integer,Integer> positionIndex1 = new HashMap<Integer, Integer>();
265-
Map<Integer,Integer> positionIndex2 = new HashMap<Integer, Integer>();
266-
// here we use false, which means that X will be used for unknown compounds
267-
String str1 = SeqRes2AtomAligner.getFullAtomSequence(c1.getAtomGroups(), positionIndex1, false);
268-
String str2 = SeqRes2AtomAligner.getFullAtomSequence(c2.getAtomGroups(), positionIndex2, false);
263+
String str2 = SeqRes2AtomAligner.getFullAtomSequence(c2.getAtomGroups(), new HashMap<>(), false);
269264

270265
int seq1Length = 0;
271266
int seq2Length = 0;
272267

273-
SequencePair<?,?> pair = null;
268+
SequencePair<?,?> pair;
274269
if (isProteinSequence(str1) && isProteinSequence(str2)) {
275270
ProteinSequence s1 = getProteinSequence(str1);
276271
ProteinSequence s2 = getProteinSequence(str2);
@@ -296,11 +291,10 @@ private static TreeMap<String,EntityInfo> findEntitiesFromAlignment(List<List<Ch
296291
pair = alignRNA(s1,s2);
297292

298293
} else {
299-
logger.debug("Chains {},{} are either different kind of polymers or could not be recognized as protein or nucleotide polymers");
294+
logger.debug("Chains {},{} are either different kind of polymers or could not be recognized as protein or nucleotide polymers", c1.getId(), c2.getId());
300295
continue;
301296
}
302297

303-
304298
int numGaps = getNumGaps(pair);
305299
int numGaps1 = getNumGapsQuery(pair);
306300
int numGaps2 = getNumGapsTarget(pair);
@@ -318,7 +312,7 @@ private static TreeMap<String,EntityInfo> findEntitiesFromAlignment(List<List<Ch
318312
if (identity > IDENTITY_THRESHOLD && gapCov1<GAP_COVERAGE_THRESHOLD && gapCov2<GAP_COVERAGE_THRESHOLD) {
319313
if ( !chainIds2entities.containsKey(c1.getId()) &&
320314
!chainIds2entities.containsKey(c2.getId())) {
321-
logger.debug("Creating Compound with chains {},{}",c1.getId(),c2.getId());
315+
logger.debug("Creating entity with chains {},{}",c1.getId(),c2.getId());
322316

323317
EntityInfo ent = new EntityInfo();
324318
ent.addChain(c1);
@@ -330,27 +324,24 @@ private static TreeMap<String,EntityInfo> findEntitiesFromAlignment(List<List<Ch
330324
chainIds2entities.put(c1.getId(), ent);
331325
chainIds2entities.put(c2.getId(), ent);
332326

333-
334327
} else {
328+
Chain chainToAdd;
335329
EntityInfo ent = chainIds2entities.get(c1.getId());
336-
337330
if (ent==null) {
338-
logger.debug("Adding chain {} to entity {}",c1.getId(),c2.getId());
339331
ent = chainIds2entities.get(c2.getId());
340-
ent.addChain(c1);
341-
c1.setEntityInfo(ent);
342-
chainIds2entities.put(c1.getId(), ent);
343-
332+
chainToAdd = c1;
344333
} else {
345-
logger.debug("Adding chain {} to entity {}",c2.getId(),c1.getId());
346-
ent.addChain(c2);
347-
c2.setEntityInfo(ent);
348-
chainIds2entities.put(c2.getId(), ent);
349-
334+
chainToAdd = c2;
335+
}
336+
if (!chainIds2entities.containsKey(chainToAdd.getId())) {
337+
logger.debug("Adding chain {} to entity {}", chainToAdd.getId(), ent.getId());
338+
ent.addChain(chainToAdd);
339+
chainToAdd.setEntityInfo(ent);
340+
chainIds2entities.put(chainToAdd.getId(), ent);
350341
}
351342
}
352343
if (!areResNumbersAligned(c1, c2)) {
353-
logger.warn("Including 100% identical chains {},{} in same Compound, although they have misaligned residue numbers",
344+
logger.warn("Including 100% identical chains {},{} in same entity, although they have misaligned residue numbers",
354345
c1.getId(),c2.getId());
355346
}
356347
}
@@ -370,7 +361,7 @@ private static TreeMap<String,EntityInfo> findEntitiesFromAlignment(List<List<Ch
370361
for (int i:polyChainIndices) {
371362
Chain c = polyModels.get(0).get(i);
372363
if (!chainIds2entities.containsKey(c.getId())) {
373-
logger.debug("Creating a 1-member Compound for chain {}",c.getId());
364+
logger.debug("Creating a 1-member entity for chain {}",c.getId());
374365

375366
EntityInfo ent = new EntityInfo();
376367
ent.addChain(c);
@@ -392,7 +383,6 @@ private static TreeMap<String,EntityInfo> findEntitiesFromAlignment(List<List<Ch
392383
}
393384
}
394385

395-
396386
return chainIds2entities;
397387
}
398388

biojava-structure/src/test/java/org/biojava/nbio/structure/io/TestNonDepositedFiles.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,32 @@ public void testRefmacPdbFile() throws IOException {
300300

301301
}
302302

303+
/**
304+
* Making sure we find the right number of entities and that chains are assigned to entities correctly.
305+
* See https://github.com/biojava/biojava/issues/931
306+
*/
307+
@Test
308+
public void testIssue931() throws IOException {
309+
InputStream inStream = new GZIPInputStream(this.getClass().getResourceAsStream("/org/biojava/nbio/structure/io/3zyb_truncated.pdb.gz"));
310+
PDBFileParser pdbpars = new PDBFileParser();
311+
FileParsingParameters params = new FileParsingParameters();
312+
params.setAlignSeqRes(true);
313+
pdbpars.setFileParsingParameters(params);
314+
Structure s = pdbpars.parsePDBFile(inStream);
315+
316+
assertEquals(2, s.getEntityInfos().size());
317+
assertEquals(4, s.getEntityById(1).getChains().size());
318+
assertEquals(3, s.getEntityById(2).getChains().size());
319+
320+
assertSame(s.getEntityById(1), s.getPolyChains().get(0).getEntityInfo());
321+
assertSame(s.getEntityById(1), s.getPolyChains().get(1).getEntityInfo());
322+
assertSame(s.getEntityById(1), s.getPolyChains().get(2).getEntityInfo());
323+
assertSame(s.getEntityById(1), s.getPolyChains().get(3).getEntityInfo());
324+
assertSame(s.getEntityById(2), s.getPolyChains().get(4).getEntityInfo());
325+
assertSame(s.getEntityById(2), s.getPolyChains().get(5).getEntityInfo());
326+
assertSame(s.getEntityById(2), s.getPolyChains().get(6).getEntityInfo());
327+
}
328+
303329
/**
304330
* This test represents a common situation for a non-deposited structure.
305331
* When building with common crystallography software, the user often adds new
Binary file not shown.

0 commit comments

Comments
 (0)