Skip to content

Commit 2a8487d

Browse files
committed
Implement the structural distance in phylo
1 parent fea2b4c commit 2a8487d

File tree

2 files changed

+50
-8
lines changed

2 files changed

+50
-8
lines changed

biojava-phylo/src/main/java/demo/DemoDistanceTree.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ public static void main(String[] args) throws Exception {
5858

5959
// 1. Calculate the evolutionary distance matrix (can take long)
6060
SubstitutionMatrix<AminoAcidCompound> M = SubstitutionMatrixHelper.getBlosum62();
61-
DistanceMatrix DM = DistanceMatrixCalculator.fractionalDissimilarityScore(msa, M);
61+
DistanceMatrix DM = DistanceMatrixCalculator.fractionalDissimilarity(msa);
6262

6363
// 2. Construct a distance tree using the NJ algorithm
6464
Phylogeny phylo = TreeConstructor.distanceTree(

biojava-phylo/src/main/java/org/biojava/nbio/phylo/DistanceMatrixCalculator.java

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@
55
import org.biojava.nbio.alignment.Alignments;
66
import org.biojava.nbio.alignment.Alignments.PairwiseSequenceScorerType;
77
import org.biojava.nbio.alignment.SimpleGapPenalty;
8+
import org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper;
89
import org.biojava.nbio.core.alignment.template.SubstitutionMatrix;
910
import org.biojava.nbio.core.sequence.MultipleSequenceAlignment;
11+
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
1012
import org.biojava.nbio.core.sequence.template.Compound;
1113
import org.biojava.nbio.core.sequence.template.Sequence;
1214
import org.biojava.nbio.core.util.ConcurrencyTools;
@@ -227,7 +229,7 @@ public static <C extends Sequence<D>, D extends Compound> DistanceMatrix fractio
227229
msa.getAlignedSequences(),
228230
PairwiseSequenceScorerType.GLOBAL_SIMILARITIES,
229231
new SimpleGapPenalty(0, 0), M);
230-
232+
231233
ConcurrencyTools.shutdown();
232234

233235
// The maximum score that can be obtained for this alignment
@@ -373,24 +375,64 @@ public static <C extends Sequence<D>, D extends Compound> DistanceMatrix dissimi
373375
*/
374376
public static <C extends Sequence<D>, D extends Compound> DistanceMatrix pamDistance(
375377
MultipleSequenceAlignment<C, D> msa) {
376-
// TODO
378+
379+
// Need to import PAM1 matrix to biojava TODO
380+
SubstitutionMatrix<AminoAcidCompound> PAM1 = SubstitutionMatrixHelper
381+
.getPAM250();
382+
377383
return null;
378384
}
379385

380386
/**
381387
* The structural distance (d<sub>S</sub>) uses the structural similarity
382388
* (or dissimilarity) from a the structural alignment of two protein
383-
* strutures. It is based on the diffusive model for protein fold evolution.
384-
* The structural deviations are captured as RMS deviations.
389+
* strutures. It is based on the diffusive model for protein fold evolution
390+
* (Grishin 1995). The structural deviations are captured as RMS deviations.
391+
*
392+
* <pre>
393+
* d<sub>Sij</sub> = (rmsd<sub>max</sub><sup>2</sup> / alpha<sup>2</sup>) *
394+
* ln( (rmsd<sub>max</sub><sup>2</sup> - rmsd<sub>0</sub><sup>2</sup>) /
395+
* (rmsd<sub>max</sub><sup>2</sup> - (rmsd<sub>ij</sub><sup>2</sup>) )
396+
* </pre>
385397
*
386398
* @param rmsdMat
387399
* RMSD matrix for all structure pairs (symmetric matrix)
400+
* @param alpha
401+
* change in CA positions introduced by a single AA substitution
402+
* (Grishin 1995: 1 A)
403+
* @param rmsdMax
404+
* estimated RMSD between proteins of the same fold when the
405+
* percentage of identity is infinitely low (the maximum allowed
406+
* RMSD of proteins with the same fold). (Grishin 1995: 5 A)
407+
* @param rmsd0
408+
* arithmetical mean of squares of the RMSD for identical
409+
* proteins (Grishin 1995: 0.4 A)
388410
* @return DistanceMatrix
389411
*/
390412
public static <C extends Sequence<D>, D extends Compound> DistanceMatrix structuralDistance(
391-
double[][] rmsdMat) {
392-
// TODO
393-
return null;
413+
double[][] rmsdMat, double alpha, double rmsdMax, double rmsd0) {
414+
415+
int n = rmsdMat.length;
416+
DistanceMatrix DM = new BasicSymmetricalDistanceMatrix(n);
417+
418+
// Transform raw RMSD into distances and create matrix
419+
for (int i = 0; i < n; i++) {
420+
for (int j = i; j < n; j++) {
421+
if (i == j)
422+
DM.setValue(i, j, 0.0);
423+
else {
424+
double d = (rmsdMax * rmsdMax)
425+
/ (alpha * alpha)
426+
* Math.log((rmsdMax * rmsdMax - rmsd0 * rmsd0)
427+
/ (rmsdMax * rmsdMax - rmsdMat[i][j]
428+
* rmsdMat[i][j]));
429+
DM.setValue(i, j, d);
430+
DM.setValue(j, i, d);
431+
}
432+
}
433+
}
434+
435+
return DM;
394436
}
395437

396438
/**

0 commit comments

Comments
 (0)