Skip to content

Commit 8da9d07

Browse files
committed
Fixing a few bugs that arose after loadChemCompInfo removal, biojava#426
All tests pass now
1 parent 3e808d4 commit 8da9d07

6 files changed

Lines changed: 94 additions & 83 deletions

File tree

biojava-alignment/src/main/java/org/biojava/nbio/alignment/Alignments.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,6 @@ public static <S extends Sequence<C>, C extends Compound> PairwiseSequenceAligne
321321
S query, S target, PairwiseSequenceAlignerType type, GapPenalty gapPenalty,
322322
SubstitutionMatrix<C> subMatrix) {
323323
if (!query.getCompoundSet().equals(target.getCompoundSet())) {
324-
System.err.println(query.getCompoundSet().getClass().getName() + " != " + target.getCompoundSet().getClass().getName());
325324
throw new IllegalArgumentException("Sequence compound sets must be the same");
326325
}
327326
switch (type) {

biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/io/TestLongPdbVsMmCifParsing.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ public void testVeryLongPdbVsMmCif() throws IOException, StructureException {
124124

125125
@Test
126126
public void testSingle() throws IOException, StructureException {
127-
testAll(Arrays.asList("1jnv"));
127+
testAll(Arrays.asList("1bcr"));
128128
}
129129

130130
@After
@@ -429,6 +429,12 @@ private void testSingleChain(Chain cPdb, Chain cCif) {
429429

430430

431431
assertEquals("failed for getAtomLength (chain "+chainId+"):",cPdb.getAtomLength(),cCif.getAtomLength());
432+
433+
// entries with polymers composed of all unknowns (giving only-X sequences) can't be aligned seqres-to-atom (for PDB files)
434+
// we've got to skip them because they won't have seqres groups
435+
// e.g. is 1jnv chain A
436+
437+
if (cPdb.getAtomSequence().matches("^X+$")) return;
432438

433439
// note for getSeqResLength to work one needs the setAlignSeqRes option in the parsers
434440

biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/io/TestSeqResParsing.java

Lines changed: 38 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -24,57 +24,56 @@
2424
package org.biojava.nbio.structure.test.io;
2525

2626

27-
import junit.framework.TestCase;
27+
import java.io.IOException;
28+
2829
import org.biojava.nbio.structure.AminoAcid;
2930
import org.biojava.nbio.structure.Chain;
3031
import org.biojava.nbio.structure.Group;
3132
import org.biojava.nbio.structure.Structure;
33+
import org.biojava.nbio.structure.StructureException;
3234
import org.biojava.nbio.structure.align.util.AtomCache;
35+
import org.junit.Test;
3336
import org.biojava.nbio.structure.StructureIO;
3437

35-
public class TestSeqResParsing extends TestCase {
38+
import static org.junit.Assert.*;
39+
40+
public class TestSeqResParsing {
3641

37-
public void test11GS(){
42+
@Test
43+
public void test11GS() throws IOException, StructureException{
3844

3945
String pdbID = "11GS";
4046

4147
Structure s;
42-
try {
43-
AtomCache cache = new AtomCache();
44-
cache.getFileParsingParams().setAlignSeqRes(true);
45-
46-
StructureIO.setAtomCache(cache);
47-
48-
s = StructureIO.getStructure(pdbID);
49-
assertNotNull(s);
50-
assertTrue(s.getChains().size() > 0);
51-
Chain c = s.getChain(0);
52-
53-
assertTrue(c.getSeqResGroups().size() > 2);
54-
55-
Group first = c.getSeqResGroup(0);
56-
Group second = c.getSeqResGroup(1);
57-
Group third = c.getSeqResGroup(2);
58-
59-
assertTrue(first instanceof AminoAcid);
60-
assertTrue(second instanceof AminoAcid);
61-
assertTrue(third instanceof AminoAcid);
62-
63-
AminoAcid aafirst = (AminoAcid) first;
64-
AminoAcid aasecond = (AminoAcid)second;
65-
AminoAcid aathird = (AminoAcid) third;
66-
67-
assertTrue(aafirst.getRecordType().equals(AminoAcid.SEQRESRECORD));
68-
assertTrue(aasecond.getRecordType().equals(AminoAcid.SEQRESRECORD));
69-
assertTrue(aathird.getRecordType().equals(AminoAcid.ATOMRECORD));
70-
71-
72-
} catch (Exception e) {
73-
74-
e.printStackTrace();
75-
fail(e.getMessage());
76-
}
77-
48+
49+
AtomCache cache = new AtomCache();
50+
cache.getFileParsingParams().setAlignSeqRes(true);
51+
52+
StructureIO.setAtomCache(cache);
53+
54+
s = StructureIO.getStructure(pdbID);
55+
assertNotNull(s);
56+
assertTrue(s.getChains().size() > 0);
57+
Chain c = s.getChain(0);
58+
59+
assertTrue(c.getSeqResGroups().size() > 2);
60+
61+
Group first = c.getSeqResGroup(0);
62+
Group second = c.getSeqResGroup(1);
63+
Group third = c.getSeqResGroup(2);
64+
65+
assertTrue(first instanceof AminoAcid);
66+
assertTrue(second instanceof AminoAcid);
67+
assertTrue(third instanceof AminoAcid);
68+
69+
AminoAcid aafirst = (AminoAcid) first;
70+
AminoAcid aasecond = (AminoAcid)second;
71+
AminoAcid aathird = (AminoAcid) third;
72+
73+
assertEquals(AminoAcid.SEQRESRECORD, aafirst.getRecordType());
74+
assertEquals(AminoAcid.SEQRESRECORD, aasecond.getRecordType());
75+
assertEquals(AminoAcid.ATOMRECORD, aathird.getRecordType());
7876

7977
}
78+
8079
}

biojava-structure/src/main/java/org/biojava/nbio/structure/io/SeqRes2AtomAligner.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -585,8 +585,9 @@ private boolean alignProteinChains(List<Group> seqRes, List<Group> atomRes) {
585585
SequencePair<ProteinSequence, AminoAcidCompound> pair = smithWaterman.getPair();
586586

587587

588-
589-
if ( pair == null) {
588+
// sequences that are only X (e.g. 1jnv chain A) produced empty alignments, because nothing aligns to nothing and thus the local alignment is empty
589+
// to avoid those empty alignments we catch them here with pair.getLength()==0
590+
if ( pair == null || pair.getLength()==0) {
590591
logger.warn("Could not align protein sequences. ATOM and SEQRES groups will not be aligned.");
591592
logger.warn("Sequences: ");
592593
logger.warn(seq1);

biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/SimpleMMcifConsumer.java

Lines changed: 45 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1590,25 +1590,22 @@ public void newStructRefSeqDif(StructRefSeqDif sref) {
15901590
sequenceDifs.add(sref);
15911591
}
15921592

1593-
private static Chain getChainFromList(List<Chain> chains, String name){
1594-
for (Chain chain : chains) {
1595-
if ( chain.getChainID().equals(name)){
1593+
private Chain getEntityChain(String entity_id){
1594+
1595+
for (Chain chain : entityChains) {
1596+
if ( chain.getChainID().equals(entity_id)){
15961597

15971598
return chain;
15981599
}
15991600
}
16001601
// does not exist yet, so create...
16011602

16021603
Chain chain = new ChainImpl();
1603-
chain.setChainID(name);
1604-
chains.add(chain);
1604+
chain.setChainID(entity_id);
1605+
entityChains.add(chain);
16051606

16061607
return chain;
1607-
}
1608-
1609-
private Chain getEntityChain(String entity_id){
16101608

1611-
return getChainFromList(entityChains,entity_id);
16121609
}
16131610

16141611
//private Chain getSeqResChain(String chainID){
@@ -1646,7 +1643,8 @@ public void newEntitySrcSyn(EntitySrcSyn entitySrcSyn){
16461643
entitySrcSyns.add(entitySrcSyn);
16471644
}
16481645

1649-
/** The EntityPolySeq object provide the amino acid sequence objects for the Entities.
1646+
/**
1647+
* The EntityPolySeq object provide the amino acid sequence objects for the Entities.
16501648
* Later on the entities are mapped to the BioJava Chain and Compound objects.
16511649
* @param epolseq the EntityPolySeq record for one amino acid
16521650
*/
@@ -1671,40 +1669,48 @@ public void newEntityPolySeq(EntityPolySeq epolseq) {
16711669
Chain entityChain = getEntityChain(epolseq.getEntity_id());
16721670

16731671

1674-
// create group from epolseq;
1675-
// by default this are the SEQRES records...
1676-
1677-
1678-
if (epolseq.getMon_id().length()==3 && StructureTools.get1LetterCodeAmino(epolseq.getMon_id())!=null){
1679-
AminoAcid g = new AminoAcidImpl();
1680-
1681-
g.setRecordType(AminoAcid.SEQRESRECORD);
1682-
1683-
g.setPDBName(epolseq.getMon_id());
1684-
1685-
Character code1 = StructureTools.get1LetterCodeAmino(epolseq.getMon_id());
1686-
g.setAminoType(code1);
1672+
// first we check through the chemcomp provider, if it fails we do some heuristics to guess the type of group
1673+
// TODO some of this code is analogous to getNewGroup() and we should try to unify them - JD 2016-03-08
1674+
1675+
Group g = ChemCompGroupFactory.getGroupFromChemCompDictionary(epolseq.getMon_id());
1676+
//int seqId = Integer.parseInt(epolseq.getNum());
1677+
if ( g != null && !g.getChemComp().isEmpty()) {
1678+
if ( g instanceof AminoAcidImpl) {
1679+
AminoAcidImpl aa = (AminoAcidImpl) g;
1680+
aa.setRecordType(AminoAcid.SEQRESRECORD);
1681+
//aa.setId(seqId);
1682+
}
1683+
} else {
16871684

1688-
g.setResidueNumber(ResidueNumber.fromString(epolseq.getNum()));
1689-
// ARGH at this stage we don't know about insertion codes
1690-
// this has to be obtained from _pdbx_poly_seq_scheme
1691-
entityChain.addGroup(g);
1685+
if (epolseq.getMon_id().length()==3 && StructureTools.get1LetterCodeAmino(epolseq.getMon_id())!=null){
1686+
AminoAcidImpl a = new AminoAcidImpl();
1687+
a.setRecordType(AminoAcid.SEQRESRECORD);
1688+
Character code1 = StructureTools.get1LetterCodeAmino(epolseq.getMon_id());
1689+
a.setAminoType(code1);
1690+
g = a;
1691+
1692+
} else if ( StructureTools.isNucleotide(epolseq.getMon_id())) {
1693+
// the group is actually a nucleotide group...
1694+
NucleotideImpl n = new NucleotideImpl();
1695+
g = n;
1696+
1697+
} else {
1698+
logger.debug("Residue {} {} is not a standard aminoacid or nucleotide, will create a het group for it", epolseq.getNum(),epolseq.getMon_id());
1699+
HetatomImpl h = new HetatomImpl();
1700+
g = h;
16921701

1693-
} else if ( StructureTools.isNucleotide(epolseq.getMon_id())) {
1694-
// the group is actually a nucleotide group...
1695-
NucleotideImpl n = new NucleotideImpl();
1702+
}
16961703

1697-
n.setResidueNumber(ResidueNumber.fromString(epolseq.getNum()));
1698-
n.setPDBName(epolseq.getMon_id());
1699-
entityChain.addGroup(n);
1700-
} else {
1701-
logger.debug("Residue {} {} is not a standard aminoacid or nucleotide, will create a het group for it", epolseq.getNum(),epolseq.getMon_id());
1702-
HetatomImpl h = new HetatomImpl();
1703-
h.setPDBName(epolseq.getMon_id());
1704-
h.setResidueNumber(ResidueNumber.fromString(epolseq.getNum()));
1705-
entityChain.addGroup(h);
17061704

17071705
}
1706+
// at this stage we don't know about author residue numbers (insertion codes)
1707+
// we abuse now the ResidueNumber field setting the internal residue numbers (label_seq_id, strictly sequential and follow the seqres sequence 1 to n)
1708+
// later the actual ResidueNumbers (author residue numbers) have to be corrected in alignSeqRes()
1709+
g.setResidueNumber(ResidueNumber.fromString(epolseq.getNum()));
1710+
1711+
g.setPDBName(epolseq.getMon_id());
1712+
1713+
entityChain.addGroup(g);
17081714

17091715
}
17101716

biojava-structure/src/test/java/org/biojava/nbio/structure/io/TestHeaderOnly.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
public class TestHeaderOnly {
2525

26-
final String pdbID = "1REP";
26+
private final String pdbID = "1REP";
2727

2828
/**
2929
* All groups are expected to be empty.

0 commit comments

Comments
 (0)