Skip to content

Commit 46435ec

Browse files
author
luke czapla
committed
All classes moved to have an analyze() method and toString() for printing
1 parent dda2351 commit 46435ec

File tree

4 files changed

+186
-24
lines changed

4 files changed

+186
-24
lines changed

biojava-structure/src/main/java/org/biojava/nbio/structure/basepairs/BasePairParameters.java

Lines changed: 74 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ public class BasePairParameters {
3636
// See URL http://ndbserver.rutgers.edu/ndbmodule/archives/reports/tsukuba/Table1.html
3737
// and the paper cited at the top of this class (also as Table 1).
3838
// These are hard-coded to avoid problems with resource paths.
39-
private static String[] standardBases = new String[] {
39+
public static String[] standardBases = new String[] {
4040
"SEQRES 1 A 1 A\n" +
4141
"ATOM 2 N9 A A 1 -1.291 4.498 0.000\n" +
4242
"ATOM 3 C8 A A 1 0.024 4.897 0.000\n" +
@@ -110,7 +110,9 @@ public class BasePairParameters {
110110
}
111111

112112
protected Structure structure;
113+
protected boolean canonical = true;
113114
protected boolean useRNA = false;
115+
protected boolean nonredundant = false;
114116
protected double[] pairParameters;
115117

116118
// this is the main data that you want to get back out from the procedure.
@@ -128,19 +130,50 @@ public class BasePairParameters {
128130
* @param useRNA whether to look for canonical RNA pairs. By default (false) it analyzes DNA.
129131
* @param removeDups whether to only look for base-pair parameters for each unique sequence in
130132
* the structure (if set to <i>true</i>)
133+
* @param canonical Whether to consider only Watson-Crick base pairs
131134
*/
132-
public BasePairParameters(Structure structure, boolean useRNA, boolean removeDups) {
135+
public BasePairParameters(Structure structure, boolean useRNA, boolean removeDups, boolean canonical) {
133136
this.structure = structure;
134137
this.useRNA = useRNA;
138+
this.canonical = canonical;
139+
this.nonredundant = removeDups;
140+
141+
}
142+
143+
public BasePairParameters(Structure structure, boolean useRNA, boolean removeDups) {
144+
this(structure, useRNA, removeDups, false);
145+
}
146+
147+
public BasePairParameters(Structure structure, boolean useRNA) {
148+
this(structure, useRNA, false, false);
149+
}
150+
151+
/**
152+
* Constructor takes a Structure object, finds base pair and base-pair step parameters
153+
* for double-helical regions within the structure for only canonical DNA pairs.
154+
* @param structure The already-loaded structure to analyze.
155+
*/
156+
public BasePairParameters(Structure structure) {
157+
this(structure, false, false, true);
158+
}
159+
160+
161+
/**
162+
* This is the main function call to extract all step parameters, pairing parameters, and sequence
163+
* information from the Structure object provided to the constructor.
164+
* @return This same object with the populated data, convenient for output
165+
* (e.g. <i>log.info(new BasePairParameters(structure).analyze());</i>)
166+
*/
167+
public BasePairParameters analyze() {
135168
if (structure == null) {
136169
pairingParameters = null;
137170
stepParameters = null;
138-
return;
171+
return this;
139172
}
140-
List<Chain> nucleics = this.getNucleicChains(removeDups);
173+
List<Chain> nucleics = this.getNucleicChains(nonredundant);
141174
List<Group[]> pairs = this.findPairs(nucleics);
142-
pairingParameters = new double[pairs.size()][6];
143-
stepParameters = new double[pairs.size()][6];
175+
this.pairingParameters = new double[pairs.size()][6];
176+
this.stepParameters = new double[pairs.size()][6];
144177
Matrix4d lastStep;
145178
Matrix4d currentStep = null;
146179
for (int i = 0; i < pairs.size(); i++) {
@@ -154,20 +187,11 @@ public BasePairParameters(Structure structure, boolean useRNA, boolean removeDup
154187
double[] sparms = calculatetp(lastStep);
155188
for (int j = 0; j < 6; j++) stepParameters[i][j] = sparms[j];
156189
}
157-
; }
158-
190+
}
191+
return this;
159192
}
160193

161194

162-
/**
163-
* Constructor takes a Structure object, finds base pair and base-pair step parameters
164-
* for double-helical regions within the structure for only canonical DNA pairs.
165-
* @param structure The already-loaded structure to analyze.
166-
*/
167-
public BasePairParameters(Structure structure) {
168-
this(structure, false, false);
169-
}
170-
171195
/**
172196
* This reports all the pair parameters, in the order of:
173197
* buckle, propeller, opening (in degrees), shear, stagger, stretch (in Å).
@@ -190,7 +214,7 @@ public double[][] getStepParameters() {
190214
/**
191215
* This returns the primary strand's sequence where parameters were found.
192216
* There are spaces in the string anywhere there was a break in the helix or when
193-
* it goes from one helix to another helix in the structure. (the "step" is still returned!)
217+
* it goes from one helix to another helix in the structure. (the "step" is still returned)
194218
* @return String of primary sequence with spaces between gaps and new helices.
195219
*/
196220
public String getPairSequence() {
@@ -212,7 +236,7 @@ public List<Matrix4d> getReferenceFrames() {
212236

213237
/**
214238
* This reports all the nucleic acid chains and has an option to remove duplicates if you
215-
* are considering an analyze of only unique DNA or RNA helices in the Structure.
239+
* are considering an analysis of only unique DNA or RNA helices in the Structure.
216240
* @param removeDups If true, it will ignore duplicate chains
217241
* @return A list of all the nucleic acid chains in order of the Structure
218242
*/
@@ -236,7 +260,6 @@ public List<Chain> getNucleicChains(boolean removeDups) {
236260
return result;
237261
}
238262

239-
240263
/**
241264
* This performs a search for base pairs in the structure. The criteria is alignment of
242265
* sequences and the canonical base pairs of DNA and RNA.
@@ -309,6 +332,11 @@ public List<Group[]> findPairs(List<Chain> chains) {
309332
}
310333

311334

335+
/**
336+
* Calculate the central frame (4x4 transformation matrix) of a single base pair.
337+
* @param pair An array of the two groups that make a hypothetical pair
338+
* @return The middle frame of the center of the base-pair formed
339+
*/
312340
public Matrix4d basePairReferenceFrame(Group[] pair) {
313341
Integer type1 = map.get(pair[0].getPDBName());
314342
Integer type2 = map.get(pair[1].getPDBName());
@@ -407,6 +435,23 @@ public Matrix4d basePairReferenceFrame(Group[] pair) {
407435
}
408436

409437

438+
@Override
439+
public String toString() {
440+
if (getPairingParameters() == null) return "No data";
441+
StringBuilder result = new StringBuilder(10000);
442+
result.append(pairingParameters.length + " base pairs\n");
443+
result.append("bp: buckle propeller opening shear stretch stagger tilt roll twist shift slide rise\n");
444+
for (int i = 0; i < pairingParameters.length; i++) {
445+
result.append(pairingNames.get(i)+": ");
446+
for (int j = 0; j < 6; j++)
447+
result.append(String.format("%5.4f", pairingParameters[i][j]) + " ");
448+
for (int j = 0; j < 6; j++)
449+
result.append(String.format("%5.4f", stepParameters[i][j]) + " ");
450+
result.append("\n");
451+
}
452+
return result.toString();
453+
}
454+
410455

411456
/**
412457
* This method calculates pairing and step parameters from 4x4 transformation matrices
@@ -519,4 +564,13 @@ public static String longestCommonSubstring(String s1, String s2) {
519564
return s1.substring(start, (start + max));
520565
}
521566

567+
protected static boolean match(char a, char b, boolean RNA) {
568+
if (a == 'A' && b == 'T' && !RNA) return true;
569+
if (a == 'A' && b == 'U' && RNA) return true;
570+
if (a == 'T' && b == 'A' && !RNA) return true;
571+
if (a == 'U' && b == 'A' && RNA) return true;
572+
if (a == 'G' && b == 'C') return true;
573+
if (a == 'C' && b == 'G') return true;
574+
return false;
575+
}
522576
}
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
package org.biojava.nbio.structure.basepairs;
2+
3+
import org.biojava.nbio.structure.Atom;
4+
import org.biojava.nbio.structure.Chain;
5+
import org.biojava.nbio.structure.Group;
6+
import org.biojava.nbio.structure.Structure;
7+
8+
import javax.vecmath.Matrix4d;
9+
import java.util.ArrayList;
10+
import java.util.List;
11+
12+
/**
13+
* Contributed to BioJava under its LGPL
14+
* This class allows for finding inter-strand base pairs that are not necessarily canonical Watson-Crick pairs.
15+
* The implementation of findPair is different than that of the base class.
16+
* Created by luke on 7/23/17.
17+
*/
18+
public class MismatchedBasePairParameters extends BasePairParameters {
19+
20+
public MismatchedBasePairParameters(Structure structure, boolean RNA, boolean removeDups, boolean canonical) {
21+
22+
super(structure, RNA, removeDups, canonical);
23+
24+
}
25+
26+
/**
27+
* This is an implementation for finding non-canonical base pairs when there may be missing or overhanging bases.
28+
* @param chains The list of chains already found to be nucleic acids
29+
* @return The list of the atom groups (residues) that are pairs, a Group[2] array
30+
*/
31+
@Override
32+
public List<Group[]> findPairs(List<Chain> chains) {
33+
List<Group[]> result = new ArrayList<>();
34+
boolean lastFoundPair = false;
35+
for (int i = 0; i < chains.size(); i++) {
36+
Chain c = chains.get(i);
37+
String sequence = c.getAtomSequence();
38+
for (int m = 0; m < sequence.length(); m++) {
39+
boolean foundPair = false;
40+
Integer type1, type2;
41+
for (int j = i + 1; j < chains.size() && !foundPair; j++) {
42+
Chain c2 = chains.get(j);
43+
if (j > i+1 && c.getAtomSequence().equals(c2.getAtomSequence()) && nonredundant) continue;
44+
String sequence2 = c2.getAtomSequence();
45+
for (int k = c2.getAtomSequence().length() - 1; k >= 0 && !foundPair; k--) {
46+
if (canonical && !BasePairParameters.match(sequence.charAt(m), sequence2.charAt(k), useRNA)) continue;
47+
Group g1 = c.getAtomGroup(m);
48+
Group g2 = c2.getAtomGroup(k);
49+
type1 = map.get(g1.getPDBName());
50+
type2 = map.get(g2.getPDBName());
51+
if (type1 == null || type2 == null) continue;
52+
Atom a1 = g1.getAtom("C1'");
53+
Atom a2 = g2.getAtom("C1'");
54+
if (a1 == null || a2 == null) continue;
55+
// C1'-C1' distance is one useful criteria
56+
if (Math.abs(a1.getCoordsAsPoint3d().distance(a2.getCoordsAsPoint3d()) - 10.0) > 3.0) continue;
57+
Group[] ga = new Group[]{g1, g2};
58+
Matrix4d data = basePairReferenceFrame(ga);
59+
// if the stagger is greater than 2 Å, it's not really paired.
60+
if (Math.abs(pairParameters[5]) > 2.0) continue;
61+
if (Math.abs(pairParameters[3]) > 5.0) continue;
62+
if (Math.abs(pairParameters[4]) > 5.0) continue;
63+
64+
// if the propeller is ridiculous it's also not that good of a pair.
65+
if (Math.abs(pairParameters[1]) > 60.0) {
66+
continue;
67+
}
68+
result.add(ga);
69+
pairingNames.add(useRNA ? baseListRNA[type1] + baseListRNA[type2] : baseListDNA[type1] + baseListDNA[type2]);
70+
foundPair = true;
71+
}
72+
if (!foundPair && lastFoundPair) {
73+
if (pairSequence.length() > 0 && pairSequence.charAt(pairSequence.length() - 1) != ' ')
74+
pairSequence += ' ';
75+
}
76+
if (foundPair) pairSequence += (c.getAtomSequence().charAt(i));
77+
lastFoundPair = foundPair;
78+
}
79+
}
80+
}
81+
return result;
82+
}
83+
84+
85+
}

biojava-structure/src/main/java/org/biojava/nbio/structure/basepairs/TertiaryBasePairParameters.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,24 @@
1010
import java.util.List;
1111

1212
/**
13-
* Created by luke on 7/22/17.
13+
* Contributed to BioJava under it's LGPL
14+
* Created by luke czapla on 7/22/17.
15+
* This class also finds the base pairing and base-pair step parameters but has a broader definition
16+
* of a base pair so that non-canonical-WC base pairs will be detected and reported. This is useful
17+
* for RNA that has folded into different regions.
1418
*/
1519
public class TertiaryBasePairParameters extends BasePairParameters {
1620

1721
public TertiaryBasePairParameters(Structure structure, boolean RNA, boolean removeDups) {
1822
super(structure, RNA, removeDups);
1923
}
2024

25+
/**
26+
* This is an alternative implementation of findPair() that looks for anything that would fit the
27+
* criteria for a base-pair, useful for the context of tertiary structure of RNA.
28+
* @param chains The list of chains already found to be nucleic acids
29+
* @return
30+
*/
2131
@Override
2232
public List<Group[]> findPairs(List<Chain> chains) {
2333
List<Group[]> result = new ArrayList<>();

biojava-structure/src/test/java/org/biojava/nbio/structure/basepairs/TestBasePairParameters.java

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,12 @@ public void testBasePair() {
2727
assertEquals(1, 2);
2828
}
2929
BasePairParameters bp = new BasePairParameters(structure);
30+
bp.analyze();
3031
double[][] pairs = bp.getPairingParameters();
3132
double[][] steps = bp.getStepParameters();
3233
String sequence = bp.getPairSequence();
3334

34-
assertEquals(sequence.trim().length(), 147);
35+
assertEquals(bp.getPairingParameters().length, 147);
3536
// below all this set of comparator data was from an external program, 3DNA.
3637
// next three in degrees: buckle, propeller, opening
3738
assertEquals(pairs[0][0], -3.796, 0.1);
@@ -44,7 +45,7 @@ public void testBasePair() {
4445
// next three in degrees: tilt, roll, twist
4546
assertEquals(steps[1][0], 2.354, 0.1);
4647
assertEquals(steps[1][1], 0.785, 0.1);
47-
assertEquals(steps[1][2], 32.522, 1.0);
48+
assertEquals(steps[1][2], 32.522, 0.5);
4849
// next three in Å, shift, slide, rise
4950
assertEquals(steps[1][3], -0.873, 0.01);
5051
assertEquals(steps[1][4], -0.607, 0.01);
@@ -57,9 +58,21 @@ public void testBasePair() {
5758
structure = null;
5859
assertEquals(1, 2);
5960
}
60-
bp = new TertiaryBasePairParameters(structure, true, false);
61+
bp = new TertiaryBasePairParameters(structure, true, false).analyze();
6162
assertEquals(9, bp.getPairingParameters().length);
6263

64+
try {
65+
structure = StructureIO.getStructure("1P71");
66+
} catch (IOException|StructureException e) {
67+
e.printStackTrace();
68+
structure = null;
69+
assertEquals(1, 2);
70+
}
71+
72+
bp = new MismatchedBasePairParameters(structure, false, false, false).analyze();
73+
assertEquals(17, bp.getPairingParameters().length);
74+
75+
6376
}
6477

6578
}

0 commit comments

Comments
 (0)