|
5 | 5 | import org.biojava.nbio.alignment.Alignments; |
6 | 6 | import org.biojava.nbio.alignment.Alignments.PairwiseSequenceScorerType; |
7 | 7 | import org.biojava.nbio.alignment.SimpleGapPenalty; |
| 8 | +import org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper; |
8 | 9 | import org.biojava.nbio.core.alignment.template.SubstitutionMatrix; |
9 | 10 | import org.biojava.nbio.core.sequence.MultipleSequenceAlignment; |
| 11 | +import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; |
10 | 12 | import org.biojava.nbio.core.sequence.template.Compound; |
11 | 13 | import org.biojava.nbio.core.sequence.template.Sequence; |
12 | 14 | import org.biojava.nbio.core.util.ConcurrencyTools; |
@@ -227,7 +229,7 @@ public static <C extends Sequence<D>, D extends Compound> DistanceMatrix fractio |
227 | 229 | msa.getAlignedSequences(), |
228 | 230 | PairwiseSequenceScorerType.GLOBAL_SIMILARITIES, |
229 | 231 | new SimpleGapPenalty(0, 0), M); |
230 | | - |
| 232 | + |
231 | 233 | ConcurrencyTools.shutdown(); |
232 | 234 |
|
233 | 235 | // The maximum score that can be obtained for this alignment |
@@ -373,24 +375,64 @@ public static <C extends Sequence<D>, D extends Compound> DistanceMatrix dissimi |
373 | 375 | */ |
374 | 376 | public static <C extends Sequence<D>, D extends Compound> DistanceMatrix pamDistance( |
375 | 377 | MultipleSequenceAlignment<C, D> msa) { |
376 | | - // TODO |
| 378 | + |
| 379 | + // Need to import PAM1 matrix to biojava TODO |
| 380 | + SubstitutionMatrix<AminoAcidCompound> PAM1 = SubstitutionMatrixHelper |
| 381 | + .getPAM250(); |
| 382 | + |
377 | 383 | return null; |
378 | 384 | } |
379 | 385 |
|
380 | 386 | /** |
381 | 387 | * The structural distance (d<sub>S</sub>) uses the structural similarity |
382 | 388 | * (or dissimilarity) from a the structural alignment of two protein |
383 | | - * strutures. It is based on the diffusive model for protein fold evolution. |
384 | | - * The structural deviations are captured as RMS deviations. |
| 389 | + * strutures. It is based on the diffusive model for protein fold evolution |
| 390 | + * (Grishin 1995). The structural deviations are captured as RMS deviations. |
| 391 | + * |
| 392 | + * <pre> |
| 393 | + * d<sub>Sij</sub> = (rmsd<sub>max</sub><sup>2</sup> / alpha<sup>2</sup>) * |
| 394 | + * ln( (rmsd<sub>max</sub><sup>2</sup> - rmsd<sub>0</sub><sup>2</sup>) / |
| 395 | + * (rmsd<sub>max</sub><sup>2</sup> - (rmsd<sub>ij</sub><sup>2</sup>) ) |
| 396 | + * </pre> |
385 | 397 | * |
386 | 398 | * @param rmsdMat |
387 | 399 | * RMSD matrix for all structure pairs (symmetric matrix) |
| 400 | + * @param alpha |
| 401 | + * change in CA positions introduced by a single AA substitution |
| 402 | + * (Grishin 1995: 1 A) |
| 403 | + * @param rmsdMax |
| 404 | + * estimated RMSD between proteins of the same fold when the |
| 405 | + * percentage of identity is infinitely low (the maximum allowed |
| 406 | + * RMSD of proteins with the same fold). (Grishin 1995: 5 A) |
| 407 | + * @param rmsd0 |
| 408 | + * arithmetical mean of squares of the RMSD for identical |
| 409 | + * proteins (Grishin 1995: 0.4 A) |
388 | 410 | * @return DistanceMatrix |
389 | 411 | */ |
390 | 412 | public static <C extends Sequence<D>, D extends Compound> DistanceMatrix structuralDistance( |
391 | | - double[][] rmsdMat) { |
392 | | - // TODO |
393 | | - return null; |
| 413 | + double[][] rmsdMat, double alpha, double rmsdMax, double rmsd0) { |
| 414 | + |
| 415 | + int n = rmsdMat.length; |
| 416 | + DistanceMatrix DM = new BasicSymmetricalDistanceMatrix(n); |
| 417 | + |
| 418 | + // Transform raw RMSD into distances and create matrix |
| 419 | + for (int i = 0; i < n; i++) { |
| 420 | + for (int j = i; j < n; j++) { |
| 421 | + if (i == j) |
| 422 | + DM.setValue(i, j, 0.0); |
| 423 | + else { |
| 424 | + double d = (rmsdMax * rmsdMax) |
| 425 | + / (alpha * alpha) |
| 426 | + * Math.log((rmsdMax * rmsdMax - rmsd0 * rmsd0) |
| 427 | + / (rmsdMax * rmsdMax - rmsdMat[i][j] |
| 428 | + * rmsdMat[i][j])); |
| 429 | + DM.setValue(i, j, d); |
| 430 | + DM.setValue(j, i, d); |
| 431 | + } |
| 432 | + } |
| 433 | + } |
| 434 | + |
| 435 | + return DM; |
394 | 436 | } |
395 | 437 |
|
396 | 438 | /** |
|
0 commit comments