Skip to content

Commit 1653b63

Browse files
authored
Merge pull request #682 from valasatava/bugfixes-4.2
Cherry-pick the c5fa135 commit
2 parents 9291fab + 450b43c commit 1653b63

File tree

2 files changed

+123
-31
lines changed

2 files changed

+123
-31
lines changed

biojava-core/src/main/java/org/biojava/nbio/core/alignment/SimpleAlignedSequence.java

Lines changed: 95 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import org.biojava.nbio.core.sequence.location.template.Location;
3232
import org.biojava.nbio.core.sequence.location.template.Point;
3333
import org.biojava.nbio.core.sequence.template.*;
34+
import org.biojava.nbio.core.util.Equals;
3435

3536
import java.io.Serializable;
3637
import java.util.ArrayList;
@@ -59,7 +60,10 @@ public class SimpleAlignedSequence<S extends Sequence<C>, C extends Compound> im
5960

6061
// cached (lazily initialized)
6162
private int numGaps = -1;
62-
private int[] alignmentFromSequence, sequenceFromAlignment;
63+
private int numGapPositions = -1;
64+
65+
private int[] alignmentFromSequence;
66+
private int[] sequenceFromAlignment;
6367

6468
/**
6569
* Creates an {@link AlignedSequence} for the given {@link Sequence} in a global alignment.
@@ -131,25 +135,35 @@ public void clearCache() {
131135
sequenceFromAlignment = null;
132136
}
133137

134-
@Override
135-
public int getAlignmentIndexAt(int sequenceIndex) {
136-
if (alignmentFromSequence == null) {
137-
alignmentFromSequence = new int[original.getLength()];
138-
int s = 1, a = 1;
139-
for (int i = 0; i < numBefore; i++, s++) {
140-
alignmentFromSequence[s - 1] = a;
141-
}
142-
for (; s <= alignmentFromSequence.length && a <= length; s++, a++) {
143-
while (a <= length && isGap(a)) {
144-
a++;
145-
}
146-
alignmentFromSequence[s - 1] = a;
147-
}
148-
a--;
149-
for (int i = 0; i < numAfter; i++, s++) {
150-
alignmentFromSequence[s - 1] = a;
138+
private void setAlignmentFromSequence() {
139+
alignmentFromSequence = new int[original.getLength()];
140+
int s = 1, a = 1;
141+
for (int i = 0; i < numBefore; i++, s++) {
142+
alignmentFromSequence[s - 1] = a;
143+
}
144+
for (; s <= alignmentFromSequence.length && a <= length; s++, a++) {
145+
while (a <= length && isGap(a)) {
146+
a++;
151147
}
148+
alignmentFromSequence[s - 1] = a;
149+
}
150+
a--;
151+
for (int i = 0; i < numAfter; i++, s++) {
152+
alignmentFromSequence[s - 1] = a;
152153
}
154+
}
155+
156+
@Override
157+
public int[] getAlignmentFromSequence() {
158+
if (alignmentFromSequence == null)
159+
setAlignmentFromSequence();
160+
return alignmentFromSequence;
161+
}
162+
163+
@Override
164+
public int getAlignmentIndexAt(int sequenceIndex) {
165+
if (alignmentFromSequence == null)
166+
setAlignmentFromSequence();
153167
return alignmentFromSequence[sequenceIndex - 1];
154168
}
155169

@@ -167,6 +181,7 @@ public Location getLocationInAlignment() {
167181
public int getNumGaps() {
168182
if (numGaps == -1) {
169183
numGaps = 0;
184+
numGapPositions = 0;
170185
C cGap = getCompoundSet().getCompoundForString(gap);
171186
boolean inGap = false;
172187
for (C compound : getAsList()) {
@@ -175,6 +190,7 @@ public int getNumGaps() {
175190
numGaps++;
176191
inGap = true;
177192
}
193+
numGapPositions++;
178194
} else {
179195
inGap = false;
180196
}
@@ -194,21 +210,31 @@ public int getOverlapCount() {
194210
return 1;
195211
}
196212

197-
@Override
198-
public int getSequenceIndexAt(int alignmentIndex) {
199-
if (sequenceFromAlignment == null) {
200-
sequenceFromAlignment = new int[length];
201-
int a = 1, s = numBefore + 1;
202-
for (int i = 0; i < getStart().getPosition(); i++, a++) {
203-
sequenceFromAlignment[a - 1] = s;
204-
}
205-
for (; a <= length; a++) {
206-
if (!isGap(a)) {
207-
s++;
208-
}
209-
sequenceFromAlignment[a - 1] = s;
213+
private void setSequenceFromAlignment() {
214+
sequenceFromAlignment = new int[length];
215+
int a = 1, s = numBefore + 1;
216+
for (int i = 0; i < getStart().getPosition(); i++, a++) {
217+
sequenceFromAlignment[a - 1] = s;
218+
}
219+
for (; a <= length; a++) {
220+
if (!isGap(a)) {
221+
s++;
210222
}
223+
sequenceFromAlignment[a - 1] = s;
211224
}
225+
}
226+
227+
@Override
228+
public int[] getSequenceFromAlignment() {
229+
if (sequenceFromAlignment == null)
230+
setSequenceFromAlignment();
231+
return sequenceFromAlignment;
232+
}
233+
234+
@Override
235+
public int getSequenceIndexAt(int alignmentIndex) {
236+
if (sequenceFromAlignment == null)
237+
setSequenceFromAlignment();
212238
return sequenceFromAlignment[alignmentIndex - 1];
213239
}
214240

@@ -266,6 +292,30 @@ public List<C> getAsList() {
266292
return compounds;
267293
}
268294

295+
@Override
296+
public boolean equals(Object o){
297+
298+
if(! Equals.classEqual(this, o)) {
299+
return false;
300+
}
301+
302+
Sequence<C> other = (Sequence<C>)o;
303+
if ( original.getAsList().size() != other.getAsList().size())
304+
return false;
305+
306+
for ( int i = 0 ; i< original.getAsList().size() ; i++){
307+
if ( ! original.getAsList().get(i).equalsIgnoreCase(other.getAsList().get(i)))
308+
return false;
309+
}
310+
return true;
311+
}
312+
313+
@Override
314+
public int hashCode(){
315+
String s = getSequenceAsString();
316+
return s.hashCode();
317+
}
318+
269319
@Override
270320
public C getCompoundAt(int alignmentIndex) {
271321
return alignmentIndex >= 1 && alignmentIndex <= length && isGap(alignmentIndex) ?
@@ -382,4 +432,18 @@ private void setLocation(List<Step> steps) {
382432
public SequenceView<C> getInverse() {
383433
throw new UnsupportedOperationException("Not supported yet.");
384434
}
435+
436+
@Override
437+
public int getNumGapPositions() {
438+
if (numGapPositions == -1)
439+
getNumGaps();
440+
return numGapPositions;
441+
}
442+
443+
@Override
444+
public double getCoverage() {
445+
446+
double coverage = getLength() - getNumGapPositions();
447+
return coverage / getOriginalSequence().getLength();
448+
}
385449
}

biojava-core/src/main/java/org/biojava/nbio/core/alignment/template/AlignedSequence.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,18 @@ enum Step { COMPOUND, GAP }
4747
*/
4848
void clearCache();
4949

50+
/** Returns the alignment.
51+
*
52+
* @return the alignment
53+
*/
54+
int[] getAlignmentFromSequence();
55+
56+
/** Returns the sequence positions at each alignment index
57+
*
58+
* @return array of the sequence positions
59+
*/
60+
int[] getSequenceFromAlignment();
61+
5062
/**
5163
* Returns the column index within an alignment corresponding to the given index in the original {@link Sequence}.
5264
* Both indices are 1-indexed and inclusive.
@@ -130,4 +142,20 @@ enum Step { COMPOUND, GAP }
130142
*/
131143
boolean isGap(int alignmentIndex);
132144

145+
/**
146+
* Returns number of gap positions (gap openings and extensions) in the sequence. This could be determined from the {@link Location}
147+
* information or from gap {@link Compound}s, which may not necessarily result in the same number.
148+
*
149+
* @return number of gap positions in the sequence
150+
*/
151+
int getNumGapPositions();
152+
153+
/**
154+
* Returns the coverage, as a fraction between 0 and 1, of this {@link AlignedSequence} with respect to the original sequence.
155+
* This is equivalent to ({@link #getLength()} - {@link #getNumGapPositions()}) / getOriginalSequence().getLength().
156+
*
157+
* @return coverage of the original sequence by the aligned sequence
158+
*/
159+
double getCoverage();
160+
133161
}

0 commit comments

Comments
 (0)