Skip to content

Commit 231a155

Browse files
committed
Rename the Tree Evaluator, specific for distance trees
It uses a LSE measure for the Tree error estimation
1 parent c485b29 commit 231a155

File tree

2 files changed

+149
-130
lines changed

2 files changed

+149
-130
lines changed

biojava-phylo/src/main/java/org/biojava/nbio/phylo/CheckTreeAccuracy.java

Lines changed: 0 additions & 130 deletions
This file was deleted.
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
/*
2+
* BioJava development code
3+
*
4+
* This code may be freely distributed and modified under the
5+
* terms of the GNU Lesser General Public Licence. This should
6+
* be distributed with the code. If you do not have a copy,
7+
* see:
8+
*
9+
* http://www.gnu.org/copyleft/lesser.html
10+
*
11+
* Copyright for this code is held jointly by the individual
12+
* authors. These should be listed in @author doc comments.
13+
*
14+
* For more information on the BioJava project and its aims,
15+
* or to join the biojava-l mailing list, visit the home page
16+
* at:
17+
*
18+
* http://www.biojava.org/
19+
*
20+
*/
21+
/*
22+
* To change this template, choose Tools | Templates
23+
* and open the template in the editor.
24+
*/
25+
package org.biojava.nbio.phylo;
26+
27+
import org.forester.evoinference.matrix.distance.DistanceMatrix;
28+
import org.forester.phylogeny.Phylogeny;
29+
import org.forester.phylogeny.PhylogenyNode;
30+
import org.slf4j.Logger;
31+
import org.slf4j.LoggerFactory;
32+
33+
import java.util.HashMap;
34+
import java.util.HashSet;
35+
import java.util.List;
36+
import java.util.Set;
37+
38+
/**
39+
* Check the accuracy of a Distance Tree by least squares error (LSE) of the
40+
* Tree branch lengths and the original Distance Matrix.
41+
*
42+
* @author Scooter Willis
43+
* @author Aleix Lafita
44+
*
45+
*/
46+
public class DistanceTreeEvaluator {
47+
48+
private static final Logger logger = LoggerFactory
49+
.getLogger(DistanceTreeEvaluator.class);
50+
51+
/** Prevent instantiation */
52+
private DistanceTreeEvaluator() {
53+
}
54+
55+
/**
56+
* Evaluate the goodness of fit of a given tree to the original distance
57+
* matrix. The returned value is the coefficient of variation, i.e. the
58+
* square root of the LS error normalized by the mean.
59+
* <p>
60+
* This measure can also give an estimate of the quality of the distance
61+
* matrix, because a bad fit may mean that the distance is non-additive.
62+
*
63+
* @param tree
64+
* Phylogenetic Distance Tree to evaluate
65+
* @param matrix
66+
* Distance Matrix with the original distances
67+
* @return the square root of the average tree LS error normalized by the
68+
* average tree distance (coefficient of variation, CV).
69+
*/
70+
public static double evaluate(Phylogeny tree, DistanceMatrix matrix) {
71+
int numSequences = matrix.getSize();
72+
List<PhylogenyNode> externalNodes = tree.getExternalNodes();
73+
HashMap<String, PhylogenyNode> externalNodesHashMap = new HashMap<String, PhylogenyNode>();
74+
Set<PhylogenyNode> path = new HashSet<PhylogenyNode>();
75+
76+
for (PhylogenyNode node : externalNodes) {
77+
externalNodesHashMap.put(node.getName(), node);
78+
}
79+
int count = 0;
80+
double averageMatrixDistance = 0.0;
81+
double averageTreeDistance = 0.0;
82+
double averageTreeErrorDistance = 0.0;
83+
for (int row = 0; row < numSequences - 1; row++) {
84+
String nodeName1 = matrix.getIdentifier(row);
85+
PhylogenyNode node1 = externalNodesHashMap.get(nodeName1);
86+
markPathToRoot(node1, path);
87+
for (int col = row + 1; col < numSequences; col++) {
88+
count++;
89+
String nodeName2 = matrix.getIdentifier(col);
90+
PhylogenyNode node2 = externalNodesHashMap.get(nodeName2);
91+
double distance = matrix.getValue(col, row);
92+
averageMatrixDistance = averageMatrixDistance + distance;
93+
PhylogenyNode commonParent = findCommonParent(node2, path);
94+
if (commonParent != null) {
95+
double treeDistance = getNodeDistance(commonParent, node1)
96+
+ getNodeDistance(commonParent, node2);
97+
98+
averageTreeDistance += treeDistance;
99+
averageTreeErrorDistance += (distance - treeDistance)
100+
* (distance - treeDistance);
101+
logger.info("{} {} Distance: {}Tree: {} difference: {}",
102+
nodeName1, nodeName2, distance, treeDistance,
103+
Math.abs(distance - treeDistance));
104+
} else {
105+
logger.warn("Unable to find common parent with {} {}",
106+
node1, node2);
107+
}
108+
}
109+
path.clear();
110+
}
111+
averageMatrixDistance /= count;
112+
averageTreeDistance /= count;
113+
averageTreeErrorDistance /= count;
114+
115+
logger.info("Average matrix distance: {}", averageMatrixDistance);
116+
logger.info("Average tree distance: {}", averageTreeDistance);
117+
logger.info("Average LS error: {}", averageTreeErrorDistance);
118+
119+
return Math.sqrt((averageTreeErrorDistance)) / (averageMatrixDistance);
120+
}
121+
122+
private static double getNodeDistance(PhylogenyNode parentNode,
123+
PhylogenyNode childNode) {
124+
double distance = 0.0;
125+
while (childNode != parentNode) {
126+
distance = distance + childNode.getDistanceToParent();
127+
childNode = childNode.getParent();
128+
}
129+
130+
return distance;
131+
}
132+
133+
private static PhylogenyNode findCommonParent(PhylogenyNode node,
134+
Set<PhylogenyNode> path) {
135+
while (!path.contains(node)) {
136+
node = node.getParent();
137+
}
138+
return node;
139+
}
140+
141+
private static void markPathToRoot(PhylogenyNode node,
142+
Set<PhylogenyNode> path) {
143+
path.add(node);
144+
while (!node.isRoot()) {
145+
node = node.getParent();
146+
path.add(node);
147+
}
148+
}
149+
}

0 commit comments

Comments
 (0)