Skip to content

Commit 88a2205

Browse files
author
luke czapla
committed
finding base pairs between the same strand for hairpins
1 parent f0cdbac commit 88a2205

File tree

3 files changed

+103
-20
lines changed

3 files changed

+103
-20
lines changed

biojava-structure/src/main/java/org/biojava/nbio/structure/basepairs/BasePairParameters.java

Lines changed: 35 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -74,14 +74,23 @@ public class BasePairParameters {
7474
"ATOM 6 C4 C A 1 0.837 2.868 0.000\n" +
7575
"ATOM 8 C5 C A 1 1.056 4.275 0.000\n" +
7676
"ATOM 9 C6 C A 1 -0.023 5.068 0.000\n" +
77-
"END"
77+
"END",
78+
"SEQRES 1 A 1 U\n" +
79+
"ATOM 2 N1 U A 1 -1.284 4.500 0.000\n" +
80+
"ATOM 3 C2 U A 1 -1.462 3.131 0.000\n" +
81+
"ATOM 5 N3 U A 1 -0.302 2.397 0.000\n" +
82+
"ATOM 6 C4 U A 1 0.989 2.884 0.000\n" +
83+
"ATOM 8 C5 U A 1 1.089 4.311 0.000\n" +
84+
"ATOM 9 C6 U A 1 -0.024 5.053 0.000\n"
7885
};
7986

8087
// this is also hard-coded data about standard WC base pairs for both DNA and RNA
81-
//private static String[] baseListDNA = {"A", "G", "T", "C"};
82-
//private static String[] baseListRNA = {"A", "G", "U", "C"};
83-
private static Map<String, Integer> map;
84-
private static Map<Integer, List<String>> ringMap;
88+
protected static String[] baseListDNA = {"A", "G", "T", "C"};
89+
protected static String[] baseListRNA = {"A", "G", "U", "C"};
90+
protected static Map<String, Integer> map;
91+
// private static List<String> RNAspecific = Arrays.asList("U", "URA"),
92+
// DNAspecific = Arrays.asList("DC", "C", "CYT");
93+
protected static Map<Integer, List<String>> ringMap;
8594
static {
8695
map = new HashMap<>();
8796
map.put("DA", 0); map.put("ADE", 0); map.put("A", 0);
@@ -100,14 +109,15 @@ public class BasePairParameters {
100109
ringMap.put(3, Arrays.asList("C6", "C2", "N3", "C4", "C5", "N1"));
101110
}
102111

103-
private Structure structure;
104-
private boolean useRNA = false;
105-
private double[] pairParameters;
112+
protected Structure structure;
113+
protected boolean useRNA = false;
114+
protected double[] pairParameters;
106115

107116
// this is the main data that you want to get back out from the procedure.
108-
private String pairSequence = "";
109-
private double[][] pairingParameters;
110-
private double[][] stepParameters;
117+
protected String pairSequence = "";
118+
protected double[][] pairingParameters;
119+
protected double[][] stepParameters;
120+
protected List<String> pairingNames = new ArrayList<>();
111121

112122

113123
/**
@@ -185,6 +195,15 @@ public String getPairSequence() {
185195
return pairSequence;
186196
}
187197

198+
/**
199+
* This returns the names of the pairs in terms of A, G, T/U, and C for each base pair group in the
200+
* list. The first character is the leading strand base and the second character is the complementary base
201+
* @return
202+
*/
203+
public List<String> getPairingNames() {
204+
return pairingNames;
205+
}
206+
188207
/**
189208
* This reports all the nucleic acid chains and has an option to remove duplicates if you
190209
* are considering an analyze of only unique DNA or RNA helices in the Structure.
@@ -258,7 +277,7 @@ public List<Group[]> findPairs(List<Chain> chains) {
258277
double distance = Math.sqrt(dx*dx+dy*dy+dz*dz);
259278
//log.info("C8-C6 Distance (Å): " + distance);
260279
// could be a base pair
261-
if (Math.abs(distance-10.0) < 2.5) {
280+
if (Math.abs(distance-10.0) < 5.0) {
262281
boolean valid = true;
263282
for (String atomname : ringMap.get(type1)) {
264283
Atom a = g1.getAtom(atomname);
@@ -269,11 +288,8 @@ public List<Group[]> findPairs(List<Chain> chains) {
269288
if (a == null) valid = false;
270289
}
271290
if (valid) {
272-
Group g3 = null;
273-
Group g4 = null;
274-
if (k + 1 < match.length()) g3 = c.getSeqResGroup(index1 + k + 1);
275-
if (k != 0) g4 = c.getSeqResGroup(index1 + k - 1);
276-
result.add(new Group[]{g1, g2, g3, g4});
291+
result.add(new Group[]{g1, g2});
292+
pairingNames.add((useRNA ? baseListRNA[type1]+baseListRNA[type2] : baseListDNA[type1]+baseListDNA[type2]));
277293
pairSequence += c.getSeqResSequence().charAt(index1 + k);
278294
} else if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' ';
279295
} else if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' ';
@@ -424,8 +440,8 @@ public static double[] calculatetp(Matrix4d input) {
424440
public static char complementBase(char base, boolean RNA) {
425441
if (base == 'A' && RNA) return 'U';
426442
if (base == 'A') return 'T';
427-
if (base == 'T') return 'A';
428-
if (base == 'U') return 'A';
443+
if (base == 'T' && !RNA) return 'A';
444+
if (base == 'U' && RNA) return 'A';
429445
if (base == 'C') return 'G';
430446
if (base == 'G') return 'C';
431447
return ' ';
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
package org.biojava.nbio.structure.basepairs;
2+
3+
import org.biojava.nbio.structure.Atom;
4+
import org.biojava.nbio.structure.Chain;
5+
import org.biojava.nbio.structure.Group;
6+
import org.biojava.nbio.structure.Structure;
7+
8+
import javax.vecmath.Matrix4d;
9+
import java.util.ArrayList;
10+
import java.util.List;
11+
12+
/**
13+
* Created by luke on 7/22/17.
14+
*/
15+
public class TertiaryBasePairParameters extends BasePairParameters {
16+
17+
public TertiaryBasePairParameters(Structure structure, boolean RNA, boolean removeDups) {
18+
super(structure, RNA, removeDups);
19+
}
20+
21+
@Override
22+
public List<Group[]> findPairs(List<Chain> chains) {
23+
List<Group[]> result = new ArrayList<>();
24+
for (int i = 0; i < chains.size(); i++) {
25+
Chain c = chains.get(i);
26+
String sequence = c.getSeqResSequence();
27+
for (int j = 0; j < sequence.length(); j++) {
28+
for (int k = sequence.length()-1; k >= j + 4; k--) {
29+
Group g1 = c.getSeqResGroup(j);
30+
Group g2 = c.getSeqResGroup(k);
31+
Integer type1 = map.get(g1.getPDBName());
32+
Integer type2 = map.get(g2.getPDBName());
33+
if (type1 == null || type2 == null) continue;
34+
Atom a1 = g1.getAtom("C1'");
35+
Atom a2 = g2.getAtom("C1'");
36+
if (a1 == null || a2 == null) continue;
37+
// C1'-C1' distance is one useful criteria
38+
if (Math.abs(a1.getCoordsAsPoint3d().distance(a2.getCoordsAsPoint3d())-10.0) > 5.0) continue;
39+
Group[] ga = new Group[] {g1, g2};
40+
Matrix4d data = basePairReferenceFrame(ga);
41+
// if the stagger is greater than 2 Å, it's not really paired.
42+
if (Math.abs(pairParameters[5]) > 2.0) continue;
43+
// if the propeller is ridiculous it's also not that good of a pair.
44+
if (Math.abs(pairParameters[1]) > 60.0) {
45+
continue;
46+
}
47+
result.add(ga);
48+
pairingNames.add(useRNA ? baseListRNA[type1]+baseListRNA[type2]: baseListDNA[type1]+baseListDNA[type2]);
49+
}
50+
}
51+
}
52+
result.addAll(super.findPairs(chains));
53+
return result;
54+
}
55+
56+
57+
}

biojava-structure/src/test/java/org/biojava/nbio/structure/basepairs/TestBasePairParameters.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ public void testBasePair() {
2626
structure = null;
2727
assertEquals(1, 2);
2828
}
29-
BasePairParameters bp = new BasePairParameters(structure, false, false);
29+
BasePairParameters bp = new BasePairParameters(structure);
3030
double[][] pairs = bp.getPairingParameters();
3131
double[][] steps = bp.getStepParameters();
3232
String sequence = bp.getPairSequence();
@@ -50,6 +50,16 @@ public void testBasePair() {
5050
assertEquals(steps[1][4], -0.607, 0.01);
5151
assertEquals(steps[1][5], 3.070, 0.01);
5252

53+
try {
54+
structure = StructureIO.getStructure("3PHP");
55+
} catch (IOException|StructureException e) {
56+
e.printStackTrace();
57+
structure = null;
58+
assertEquals(1, 2);
59+
}
60+
bp = new TertiaryBasePairParameters(structure, true, false);
61+
assertEquals(bp.getPairingParameters().length, 8);
62+
5363
}
5464

5565
}

0 commit comments

Comments
 (0)