Skip to content

Commit 96d8baa

Browse files
committed
moving utility classes to detect type of Chain to the Chain interface level (used to be StructureTools).
1 parent 8307f59 commit 96d8baa

3 files changed

Lines changed: 151 additions & 72 deletions

File tree

biojava-structure/src/main/java/org/biojava/nbio/structure/Chain.java

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
import org.biojava.nbio.core.sequence.template.Sequence;
2727
import org.biojava.nbio.structure.io.FileParsingParameters;
28+
import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
2829

2930
import java.util.List;
3031

@@ -277,7 +278,7 @@ public interface Chain {
277278
/**
278279
* Returns the sequence of amino acids as it has been provided in the ATOM records.
279280
* Non-standard residues will be present in the string only if the property
280-
* {@value PDBFileReader.LOAD_CHEM_COMP_PROPERTY} has been set.
281+
* {@value org.biojava.nbio.structure.io.PDBFileReader.LOAD_CHEM_COMP_PROPERTY} has been set.
281282
* @return amino acid sequence as string
282283
* @see #getSeqResSequence()
283284
*/
@@ -413,5 +414,53 @@ public interface Chain {
413414
* @return
414415
* @see EntityType
415416
*/
416-
EntityType getEntityType();
417+
EntityType getEntityType();
418+
419+
/** Tests if a chain is consisting of water molecules only
420+
*
421+
* @return true if there are only solvent molecules in this chain.
422+
*/
423+
public boolean isWaterOnly();
424+
425+
/** Returns true if the given chain is composed of non-polymeric (including water) groups only.
426+
*
427+
* @return true if only non-polymeric groups in this chain.
428+
*/
429+
public boolean isPureNonPolymer();
430+
431+
/**
432+
* Get the predominant {@link GroupType} for a given Chain, following these
433+
* rules: <li>if the ratio of number of residues of a certain
434+
* {@link GroupType} to total non-water residues is above the threshold
435+
* {@value #org.biojava.nbio.structure.StructureTools.RATIO_RESIDUES_TO_TOTAL}, then that {@link GroupType} is
436+
* returned</li> <li>if there is no {@link GroupType} that is above the
437+
* threshold then the {@link GroupType} with most members is chosen, logging
438+
* it</li>
439+
* <p>
440+
* See also {@link ChemComp#getPolymerType()} and
441+
* {@link ChemComp#getResidueType()} which follow the PDB chemical component
442+
* dictionary and provide a much more accurate description of groups and
443+
* their linking.
444+
* </p>
445+
*
446+
* @return
447+
*/
448+
public GroupType getPredominantGroupType();
449+
450+
/**
451+
* Tell whether given chain is a protein chain
452+
*
453+
454+
* @return true if protein, false if nucleotide or ligand
455+
* @see #getPredominantGroupType()
456+
*/
457+
public boolean isProtein();
458+
459+
/**
460+
* Tell whether given chain is DNA or RNA
461+
*
462+
* @return true if nucleic acid, false if protein or ligand
463+
* @see #getPredominantGroupType()
464+
*/
465+
public boolean isNucleicAcid();
417466
}

biojava-structure/src/main/java/org/biojava/nbio/structure/ChainImpl.java

Lines changed: 94 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -728,6 +728,99 @@ public EntityType getEntityType() {
728728
if (getEntityInfo()==null) return null;
729729
return getEntityInfo().getType();
730730
}
731-
731+
732+
@Override
733+
public boolean isWaterOnly() {
734+
for (Group g : getAtomGroups()) {
735+
if (!g.isWater())
736+
return false;
737+
}
738+
return true;
739+
}
740+
741+
@Override
742+
public boolean isPureNonPolymer() {
743+
for (Group g : getAtomGroups()) {
744+
745+
ChemComp cc = g.getChemComp();
746+
747+
if ( g.isPolymeric() &&
748+
!g.isHetAtomInFile() ) {
749+
750+
// important: the aminoacid or nucleotide residue can be in Atom records
751+
752+
return false;
753+
}
754+
755+
}
756+
return true;
757+
}
758+
759+
@Override
760+
public GroupType getPredominantGroupType(){
761+
762+
double RATIO_RESIDUES_TO_TOTAL = StructureTools.RATIO_RESIDUES_TO_TOTAL;
763+
764+
int sizeAminos = getAtomGroups(GroupType.AMINOACID).size();
765+
int sizeNucleotides = getAtomGroups(GroupType.NUCLEOTIDE).size();
766+
List<Group> hetAtoms = getAtomGroups(GroupType.HETATM);
767+
int sizeHetatoms = hetAtoms.size();
768+
int sizeWaters = 0;
769+
for (Group g : hetAtoms) {
770+
if (g.isWater())
771+
sizeWaters++;
772+
}
773+
int sizeHetatomsWithoutWater = sizeHetatoms - sizeWaters;
774+
775+
int fullSize = sizeAminos + sizeNucleotides + sizeHetatomsWithoutWater;
776+
777+
if ((double) sizeAminos / (double) fullSize > StructureTools.RATIO_RESIDUES_TO_TOTAL)
778+
return GroupType.AMINOACID;
779+
780+
if ((double) sizeNucleotides / (double) fullSize > RATIO_RESIDUES_TO_TOTAL)
781+
return GroupType.NUCLEOTIDE;
782+
783+
if ((double) (sizeHetatomsWithoutWater) / (double) fullSize > RATIO_RESIDUES_TO_TOTAL)
784+
return GroupType.HETATM;
785+
786+
// finally if neither condition works, we try based on majority, but log
787+
// it
788+
GroupType max;
789+
if (sizeNucleotides > sizeAminos) {
790+
if (sizeNucleotides > sizeHetatomsWithoutWater) {
791+
max = GroupType.NUCLEOTIDE;
792+
} else {
793+
max = GroupType.HETATM;
794+
}
795+
} else {
796+
if (sizeAminos > sizeHetatomsWithoutWater) {
797+
max = GroupType.AMINOACID;
798+
} else {
799+
max = GroupType.HETATM;
800+
}
801+
}
802+
logger.debug(
803+
"Ratio of residues to total for chain with asym_id {} is below {}. Assuming it is a {} chain. "
804+
+ "Counts: # aa residues: {}, # nuc residues: {}, # non-water het residues: {}, # waters: {}, "
805+
+ "ratio aa/total: {}, ratio nuc/total: {}",
806+
getId(), RATIO_RESIDUES_TO_TOTAL, max, sizeAminos,
807+
sizeNucleotides, sizeHetatomsWithoutWater, sizeWaters,
808+
(double) sizeAminos / (double) fullSize,
809+
(double) sizeNucleotides / (double) fullSize);
810+
811+
return max;
812+
}
813+
814+
@Override
815+
public boolean isProtein() {
816+
return getPredominantGroupType() == GroupType.AMINOACID;
817+
}
818+
819+
@Override
820+
public boolean isNucleicAcid() {
821+
return getPredominantGroupType() == GroupType.NUCLEOTIDE;
822+
}
823+
824+
732825
}
733826

biojava-structure/src/main/java/org/biojava/nbio/structure/StructureTools.java

Lines changed: 6 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1688,7 +1688,8 @@ public static Structure getStructure(String name, PDBFileParser parser,
16881688
* @see #getPredominantGroupType(Chain)
16891689
*/
16901690
public static boolean isProtein(Chain c) {
1691-
return getPredominantGroupType(c) == GroupType.AMINOACID;
1691+
1692+
return c.isProtein();
16921693
}
16931694

16941695
/**
@@ -1699,7 +1700,7 @@ public static boolean isProtein(Chain c) {
16991700
* @see #getPredominantGroupType(Chain)
17001701
*/
17011702
public static boolean isNucleicAcid(Chain c) {
1702-
return getPredominantGroupType(c) == GroupType.NUCLEOTIDE;
1703+
return c.isNucleicAcid();
17031704
}
17041705

17051706
/**
@@ -1721,54 +1722,7 @@ public static boolean isNucleicAcid(Chain c) {
17211722
* @return
17221723
*/
17231724
public static GroupType getPredominantGroupType(Chain c) {
1724-
int sizeAminos = c.getAtomGroups(GroupType.AMINOACID).size();
1725-
int sizeNucleotides = c.getAtomGroups(GroupType.NUCLEOTIDE).size();
1726-
List<Group> hetAtoms = c.getAtomGroups(GroupType.HETATM);
1727-
int sizeHetatoms = hetAtoms.size();
1728-
int sizeWaters = 0;
1729-
for (Group g : hetAtoms) {
1730-
if (g.isWater())
1731-
sizeWaters++;
1732-
}
1733-
int sizeHetatomsWithoutWater = sizeHetatoms - sizeWaters;
1734-
1735-
int fullSize = sizeAminos + sizeNucleotides + sizeHetatomsWithoutWater;
1736-
1737-
if ((double) sizeAminos / (double) fullSize > RATIO_RESIDUES_TO_TOTAL)
1738-
return GroupType.AMINOACID;
1739-
1740-
if ((double) sizeNucleotides / (double) fullSize > RATIO_RESIDUES_TO_TOTAL)
1741-
return GroupType.NUCLEOTIDE;
1742-
1743-
if ((double) (sizeHetatomsWithoutWater) / (double) fullSize > RATIO_RESIDUES_TO_TOTAL)
1744-
return GroupType.HETATM;
1745-
1746-
// finally if neither condition works, we try based on majority, but log
1747-
// it
1748-
GroupType max;
1749-
if (sizeNucleotides > sizeAminos) {
1750-
if (sizeNucleotides > sizeHetatomsWithoutWater) {
1751-
max = GroupType.NUCLEOTIDE;
1752-
} else {
1753-
max = GroupType.HETATM;
1754-
}
1755-
} else {
1756-
if (sizeAminos > sizeHetatomsWithoutWater) {
1757-
max = GroupType.AMINOACID;
1758-
} else {
1759-
max = GroupType.HETATM;
1760-
}
1761-
}
1762-
logger.debug(
1763-
"Ratio of residues to total for chain with asym_id {} is below {}. Assuming it is a {} chain. "
1764-
+ "Counts: # aa residues: {}, # nuc residues: {}, # non-water het residues: {}, # waters: {}, "
1765-
+ "ratio aa/total: {}, ratio nuc/total: {}",
1766-
c.getId(), RATIO_RESIDUES_TO_TOTAL, max, sizeAminos,
1767-
sizeNucleotides, sizeHetatomsWithoutWater, sizeWaters,
1768-
(double) sizeAminos / (double) fullSize,
1769-
(double) sizeNucleotides / (double) fullSize);
1770-
1771-
return max;
1725+
return c.getPredominantGroupType();
17721726
}
17731727

17741728
/**
@@ -1778,11 +1732,7 @@ public static GroupType getPredominantGroupType(Chain c) {
17781732
* @return
17791733
*/
17801734
public static boolean isChainWaterOnly(Chain c) {
1781-
for (Group g : c.getAtomGroups()) {
1782-
if (!g.isWater())
1783-
return false;
1784-
}
1785-
return true;
1735+
return c.isWaterOnly();
17861736
}
17871737

17881738
/**
@@ -1794,20 +1744,7 @@ public static boolean isChainWaterOnly(Chain c) {
17941744
*/
17951745
public static boolean isChainPureNonPolymer(Chain c) {
17961746

1797-
for (Group g : c.getAtomGroups()) {
1798-
1799-
ChemComp cc = g.getChemComp();
1800-
1801-
if ( g.isPolymeric() &&
1802-
!g.isHetAtomInFile() ) {
1803-
1804-
// important: the aminoacid or nucleotide residue can be in Atom records
1805-
1806-
return false;
1807-
}
1808-
1809-
}
1810-
return true;
1747+
return c.isPureNonPolymer();
18111748
}
18121749

18131750
/**

0 commit comments

Comments
 (0)