Skip to content

Commit f57ede4

Browse files
committed
Method for percentage of identity of a sequence pair biojava#491
1 parent 5264850 commit f57ede4

File tree

2 files changed

+34
-1
lines changed

2 files changed

+34
-1
lines changed

biojava-core/src/main/java/org/biojava/nbio/core/alignment/SimpleAlignedSequence.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ public class SimpleAlignedSequence<S extends Sequence<C>, C extends Compound> im
5959

6060
// cached (lazily initialized)
6161
private int numGaps = -1;
62+
private int numGapPositions = -1;
6263
private int[] alignmentFromSequence, sequenceFromAlignment;
6364

6465
/**
@@ -167,6 +168,7 @@ public Location getLocationInAlignment() {
167168
public int getNumGaps() {
168169
if (numGaps == -1) {
169170
numGaps = 0;
171+
numGapPositions = 0;
170172
C cGap = getCompoundSet().getCompoundForString(gap);
171173
boolean inGap = false;
172174
for (C compound : getAsList()) {
@@ -175,6 +177,7 @@ public int getNumGaps() {
175177
numGaps++;
176178
inGap = true;
177179
}
180+
numGapPositions++;
178181
} else {
179182
inGap = false;
180183
}
@@ -382,4 +385,18 @@ private void setLocation(List<Step> steps) {
382385
public SequenceView<C> getInverse() {
383386
throw new UnsupportedOperationException("Not supported yet.");
384387
}
388+
389+
@Override
390+
public int getNumGapPositions() {
391+
if (numGapPositions == -1)
392+
getNumGaps();
393+
return numGapPositions;
394+
}
395+
396+
@Override
397+
public double getCoverage() {
398+
399+
double coverage = getLength() - getNumGapPositions();
400+
return coverage / getOriginalSequence().getLength();
401+
}
385402
}

biojava-core/src/main/java/org/biojava/nbio/core/alignment/template/AlignedSequence.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,28 @@ enum Step { COMPOUND, GAP }
7474
Location getLocationInAlignment();
7575

7676
/**
77-
* Returns number of gaps in the sequence. This could be determined from the {@link Location} information or from
77+
* Returns number of gaps (gap openings) in the sequence. This could be determined from the {@link Location} information or from
7878
* gap {@link Compound}s, which may not necessarily result in the same number.
7979
*
8080
* @return number of gaps in the sequence
8181
*/
8282
int getNumGaps();
83+
84+
/**
85+
* Returns number of gap positions (gap openings and extensions) in the sequence. This could be determined from the {@link Location}
86+
* information or from gap {@link Compound}s, which may not necessarily result in the same number.
87+
*
88+
* @return number of gap positions in the sequence
89+
*/
90+
int getNumGapPositions();
91+
92+
/**
93+
* Returns the coverage, as a fraction between 0 and 1, of this {@link AlignedSequence} with respect to the original sequence.
94+
* This is equivalent to ({@link #getLength()} - {@link #getNumGapPositions()}) / getOriginalSequence().getLength().
95+
*
96+
* @return coverage of the original sequence by the aligned sequence
97+
*/
98+
double getCoverage();
8399

84100
/**
85101
* Returns the original {@link Sequence} before alignment.

0 commit comments

Comments
 (0)