Skip to content

Commit 3b093b9

Browse files
authored
Merge pull request #600 from sbliven/fix557
Allow partial residue ranges (#557)
2 parents d347689 + 485e2c5 commit 3b093b9

File tree

6 files changed

+189
-85
lines changed

6 files changed

+189
-85
lines changed

biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/StructureToolsTest.java

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,41 @@ public void testGetSubRangesExtended() throws StructureException {
391391

392392
assertEquals("Did not find the expected number of residues in "+range, 6, chain.getAtomLength() );
393393

394+
// partial ranges
395+
range = "A:-+1";
396+
substr = StructureTools.getSubRanges(structure2, range);
397+
assertEquals("Wrong number of chains in "+range, 1, substr.size());
398+
chain = substr.getChainByIndex(0);
399+
assertEquals("Did not find the expected number of residues in "+range, 4, chain.getAtomLength() );
400+
401+
range = "A:--1";
402+
substr = StructureTools.getSubRanges(structure2, range);
403+
assertEquals("Wrong number of chains in "+range, 1, substr.size());
404+
chain = substr.getChainByIndex(0);
405+
assertEquals("Did not find the expected number of residues in "+range, 3, chain.getAtomLength() );
406+
407+
range = "A:^-+1";
408+
substr = StructureTools.getSubRanges(structure2, range);
409+
assertEquals("Wrong number of chains in "+range, 1, substr.size());
410+
chain = substr.getChainByIndex(0);
411+
assertEquals("Did not find the expected number of residues in "+range, 4, chain.getAtomLength() );
412+
413+
range = "A:^-$";
414+
substr = StructureTools.getSubRanges(structure2, range);
415+
assertEquals("Wrong number of chains in "+range, 1, substr.getPolyChains().size());
416+
chain = substr.getPolyChains().get(0);
417+
418+
range = "A:400-";
419+
substr = StructureTools.getSubRanges(structure2, range);
420+
assertEquals("Wrong number of chains in "+range, 1, substr.size());
421+
chain = substr.getChainByIndex(0);
422+
assertEquals("Did not find the expected number of residues in "+range, 6, chain.getAtomLength() );
423+
424+
range = "A:400-$";
425+
substr = StructureTools.getSubRanges(structure2, range);
426+
assertEquals("Wrong number of chains in "+range, 1, substr.size());
427+
chain = substr.getChainByIndex(0);
428+
assertEquals("Did not find the expected number of residues in "+range, 6, chain.getAtomLength() );
394429

395430
// whitespace
396431
range = "A:3-7, B:8-12";

biojava-structure/src/main/java/org/biojava/nbio/structure/Chain.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -165,8 +165,8 @@ public interface Chain {
165165
/**
166166
* Get all groups that are located between two PDB residue numbers.
167167
*
168-
* @param pdbresnumStart PDB residue number of start
169-
* @param pdbresnumEnd PDB residue number of end
168+
* @param pdbresnumStart PDB residue number of start. If null, defaults to the chain start.
169+
* @param pdbresnumEnd PDB residue number of end. If null, defaults to the chain end.
170170
* @return Groups in between. or throws a StructureException if either start or end can not be found,
171171
* @throws StructureException
172172
*/
@@ -179,8 +179,8 @@ public interface Chain {
179179
* of groups as specified by the DBREF records - these frequently are rather inaccurate.
180180
*
181181
*
182-
* @param pdbresnumStart PDB residue number of start
183-
* @param pdbresnumEnd PDB residue number of end
182+
* @param pdbresnumStart PDB residue number of start. If null, defaults to the chain start.
183+
* @param pdbresnumEnd PDB residue number of end. If null, defaults to the chain end.
184184
* @param ignoreMissing ignore missing groups in this range.
185185
* @return Groups in between. or throws a StructureException if either start or end can not be found,
186186
* @throws StructureException

biojava-structure/src/main/java/org/biojava/nbio/structure/ChainImpl.java

Lines changed: 32 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -340,58 +340,60 @@ public void setAtomGroups(List<Group> groups){
340340
}
341341

342342
@Override
343-
@Deprecated // TODO dmyersturnbull: why is this deprecated if it's declared in Chain?
344343
public Group[] getGroupsByPDB(ResidueNumber start, ResidueNumber end, boolean ignoreMissing)
345344
throws StructureException {
346-
347-
if (! ignoreMissing )
348-
return getGroupsByPDB(start, end);
345+
// Short-circut for include all groups
346+
if(start == null && end == null) {
347+
return groups.toArray(new Group[groups.size()]);
348+
}
349349

350350

351351
List<Group> retlst = new ArrayList<>();
352352

353-
String pdbresnumStart = start.toString();
354-
String pdbresnumEnd = end.toString();
355-
356-
int startPos = start.getSeqNum();
357-
int endPos = end.getSeqNum();
358-
359-
boolean adding = false;
360-
boolean foundStart = false;
353+
boolean adding, foundStart;
354+
if( start == null ) {
355+
// start with first group
356+
adding = true;
357+
foundStart = true;
358+
} else {
359+
adding = false;
360+
foundStart = false;
361+
}
361362

363+
362364
for (Group g: groups){
363365

364-
if ( g.getResidueNumber().toString().equals(pdbresnumStart)) {
366+
// Check for start
367+
if (!adding && start.equalsPositional(g.getResidueNumber())) {
365368
adding = true;
366369
foundStart = true;
367370
}
368371

369-
if ( ! (foundStart && adding) ) {
370-
372+
// Check if past start
373+
if ( ignoreMissing && ! (foundStart && adding) ) {
374+
ResidueNumber pos = g.getResidueNumber();
371375

372-
int pos = g.getResidueNumber().getSeqNum();
373-
374-
if ( pos >= startPos) {
376+
if ( start != null && start.compareToPositional(pos) <= 0) {
375377
foundStart = true;
376378
adding = true;
377379
}
378-
379-
380380
}
381381

382382
if ( adding)
383383
retlst.add(g);
384384

385-
if ( g.getResidueNumber().toString().equals(pdbresnumEnd)) {
385+
// check for end
386+
if ( end != null && end.equalsPositional(g.getResidueNumber())) {
386387
if ( ! adding)
387-
throw new StructureException("did not find start PDB residue number " + pdbresnumStart + " in chain " + authId);
388+
throw new StructureException("did not find start PDB residue number " + start + " in chain " + authId);
388389
adding = false;
389390
break;
390391
}
391-
if (adding){
392+
// check if past end
393+
if ( ignoreMissing && adding && end != null){
392394

393-
int pos = g.getResidueNumber().getSeqNum();
394-
if (pos >= endPos) {
395+
ResidueNumber pos = g.getResidueNumber();
396+
if ( end.compareToPositional(pos) <= 0) {
395397
adding = false;
396398
break;
397399
}
@@ -400,7 +402,10 @@ public Group[] getGroupsByPDB(ResidueNumber start, ResidueNumber end, boolean ig
400402
}
401403

402404
if ( ! foundStart){
403-
throw new StructureException("did not find start PDB residue number " + pdbresnumStart + " in chain " + authId);
405+
throw new StructureException("did not find start PDB residue number " + start + " in chain " + authId);
406+
}
407+
if ( end != null && adding && !ignoreMissing) {
408+
throw new StructureException("did not find end PDB residue number " + end + " in chain " + authId);
404409
}
405410

406411

@@ -432,42 +437,7 @@ public Group getGroupByPDB(ResidueNumber resNum) throws StructureException {
432437
@Override
433438
public Group[] getGroupsByPDB(ResidueNumber start, ResidueNumber end)
434439
throws StructureException {
435-
436-
String pdbresnumStart = start.toString();
437-
String pdbresnumEnd = end.toString();
438-
439-
List<Group> retlst = new ArrayList<>();
440-
441-
Iterator<Group> iter = groups.iterator();
442-
boolean adding = false;
443-
boolean foundStart = false;
444-
445-
while ( iter.hasNext()){
446-
Group g = iter.next();
447-
if ( g.getResidueNumber().toString().equals(pdbresnumStart)) {
448-
adding = true;
449-
foundStart = true;
450-
}
451-
452-
if ( adding)
453-
retlst.add(g);
454-
455-
if ( g.getResidueNumber().toString().equals(pdbresnumEnd)) {
456-
if ( ! adding)
457-
throw new StructureException("did not find start PDB residue number " + pdbresnumStart + " in chain " + authId);
458-
adding = false;
459-
break;
460-
}
461-
}
462-
463-
if ( ! foundStart){
464-
throw new StructureException("did not find start PDB residue number " + pdbresnumStart + " in chain " + authId);
465-
}
466-
if ( adding) {
467-
throw new StructureException("did not find end PDB residue number " + pdbresnumEnd + " in chain " + authId);
468-
}
469-
470-
return retlst.toArray(new Group[retlst.size()] );
440+
return getGroupsByPDB(start, end, false);
471441
}
472442

473443

biojava-structure/src/main/java/org/biojava/nbio/structure/ResidueNumber.java

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,35 @@ public boolean equals(Object obj) {
111111

112112
return true;
113113
}
114+
115+
/**
116+
* Check if the seqNum and insertion code are equivalent,
117+
* ignoring the chain
118+
* @param obj
119+
* @return
120+
*/
121+
public boolean equalsPositional(Object obj) {
122+
if (this == obj)
123+
return true;
124+
if (obj == null)
125+
return false;
126+
if (getClass() != obj.getClass())
127+
return false;
128+
ResidueNumber other = (ResidueNumber) obj;
129+
if (insCode == null) {
130+
if (other.insCode != null)
131+
return false;
132+
} else if (!insCode.equals(other.insCode))
133+
return false;
134+
if (seqNum == null) {
135+
if (other.seqNum != null)
136+
return false;
137+
} else if (!seqNum.equals(other.seqNum))
138+
return false;
139+
140+
return true;
141+
142+
}
114143

115144
@Override
116145
public int hashCode() {
@@ -192,6 +221,9 @@ else if ( icode.length() > 0)
192221
}
193222

194223

224+
/**
225+
* Compare residue numbers by chain, sequence number, and insertion code
226+
*/
195227
@Override
196228
public int compareTo(ResidueNumber other) {
197229

@@ -205,6 +237,16 @@ public int compareTo(ResidueNumber other) {
205237
return -1;
206238
}
207239

240+
return compareToPositional(other);
241+
}
242+
243+
/**
244+
* Compare residue numbers by sequence number and insertion code,
245+
* ignoring the chain
246+
* @param other
247+
* @return
248+
*/
249+
public int compareToPositional(ResidueNumber other) {
208250
// sequence number
209251
if (seqNum != null && other.seqNum != null) {
210252
if (!seqNum.equals(other.seqNum)) return seqNum.compareTo(other.seqNum);

biojava-structure/src/main/java/org/biojava/nbio/structure/ResidueRange.java

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,9 @@
2929
import java.util.regex.Pattern;
3030

3131
/**
32-
* A chainName, a start residue, and an end residue.
32+
* A chainName, a start residue, and an end residue. The chainName is matched
33+
* to {@link Chain#getName()}, so for mmCIF files it indicates the authorId
34+
* rather than the asymId.
3335
*
3436
* Chain may be null when referencing a single-chainName structure; for multi-chainName
3537
* structures omitting the chainName is an error. Start and/or end may also be null,
@@ -50,10 +52,10 @@ public class ResidueRange {
5052
"(?:" + //begin range, this is a "non-capturing group"
5153
"(?::|_|:$|_$|$)" + //colon or underscore, could be at the end of a line, another non-capt. group.
5254
"(?:"+ // another non capturing group for the residue range
53-
"([-+]?[0-9]+[A-Za-z]?)" + // first residue
55+
"([-+]?[0-9]+[A-Za-z]?|\\^)?" + // first residue
5456
"(?:" +
55-
"\\s*-\\s*" + // -
56-
"([-+]?[0-9]+[A-Za-z]?)" + // second residue
57+
"\\s*(-)\\s*" + // hyphen indicates a range was intended
58+
"([-+]?[0-9]+[A-Za-z]?|\\$)?" + // second residue
5759
")?+"+
5860
")?+"+
5961
")?" + //end range
@@ -74,10 +76,12 @@ public class ResidueRange {
7476
*
7577
* <p>Examples:
7678
* <ul>
77-
* <li><code>A.5-100</code>
79+
* <li><code>A:5-100</code>
7880
* <li><code>A_5-100</code>
7981
* <li><code>A_-5</code>
80-
* <li><code>A.-12I-+12I
82+
* <li><code>A:-12I-+12I</code>
83+
* <li><code>A:^-$</code>
84+
* </ul>
8185
*
8286
* @param s residue string to parse
8387
* @return The unique ResidueRange corresponding to {@code s}
@@ -90,16 +94,22 @@ public static ResidueRange parse(String s) {
9094
try {
9195
chain = matcher.group(1);
9296
if (matcher.group(2) != null) {
93-
start = ResidueNumber.fromString(matcher.group(2));
94-
start.setChainName(chain);
95-
if(matcher.group(3) == null) {
96-
// single-residue range
97-
end = start;
98-
} else {
99-
end = ResidueNumber.fromString(matcher.group(3));
100-
end.setChainName(chain);
97+
// ^ indicates first res (start==null)
98+
if(!"^".equals(matcher.group(2)) ) {
99+
start = ResidueNumber.fromString(matcher.group(2));
100+
start.setChainName(chain);
101101
}
102102
}
103+
if(matcher.group(3) == null) {
104+
// single-residue range
105+
end = start;
106+
} else
107+
// $ indicates last res (end==null)
108+
if( matcher.group(4) != null && !"$".equals(matcher.group(4)) ){
109+
end = ResidueNumber.fromString(matcher.group(4));
110+
end.setChainName(chain);
111+
}
112+
103113
return new ResidueRange(chain, start, end);
104114
} catch (IllegalStateException e) {
105115
throw new IllegalArgumentException("Range " + s + " was not valid", e);

0 commit comments

Comments
 (0)