Skip to content

Commit e256a92

Browse files
author
luke czapla
committed
Moved base pair code into new package basepairs
1 parent 4632f96 commit e256a92

File tree

3 files changed

+155
-84
lines changed

3 files changed

+155
-84
lines changed

biojava-structure/src/main/java/org/biojava/nbio/structure/secstruc/BasePairParameters.java renamed to biojava-structure/src/main/java/org/biojava/nbio/structure/basepairs/BasePairParameters.java

Lines changed: 99 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package org.biojava.nbio.structure.secstruc;
1+
package org.biojava.nbio.structure.basepairs;
22

33
import org.biojava.nbio.structure.*;
44
import org.biojava.nbio.structure.geometry.SuperPosition;
@@ -21,12 +21,21 @@
2121
* This module calculates the el Hassan-Calladine Base Pairing and Base-pair Step Parameters
2222
* Citation: https://www.ncbi.nlm.nih.gov/pubmed/11601858
2323
*
24-
* Created by luke on 7/20/17.
24+
* The method that might be overridden is findPairs(), this implementation is used for a large-scale
25+
* analysis of the most proper helical regions in almost 4000 protein-DNA structures, almost
26+
* 2000 structures containing only DNA, or almost 1300 structures containing only RNA. (as of 7/2017).
27+
* Those who study tertiary structures for RNA folding would be better using their own method,
28+
* because this is only looking for base pairs between separate strands.
29+
*
30+
* Created by luke czapla on 7/20/17.
2531
*/
2632
public class BasePairParameters {
2733

2834
private static Logger log = LoggerFactory.getLogger(BasePairParameters.class);
29-
35+
36+
// See URL http://ndbserver.rutgers.edu/ndbmodule/archives/reports/tsukuba/Table1.html
37+
// and the paper cited at the top of this class (also as Table 1).
38+
// These are hard-coded to avoid problems with resource paths.
3039
private static String[] standardBases = new String[] {
3140
"SEQRES 1 A 1 A\n" +
3241
"ATOM 2 N9 A A 1 -1.291 4.498 0.000\n" +
@@ -68,17 +77,18 @@ public class BasePairParameters {
6877
"END"
6978
};
7079

71-
private static String[] baseListDNA = {"A", "G", "T", "C"};
72-
private static String[] baseListRNA = {"A", "G", "U", "C"};
80+
// this is also hard-coded data about standard WC base pairs for both DNA and RNA
81+
//private static String[] baseListDNA = {"A", "G", "T", "C"};
82+
//private static String[] baseListRNA = {"A", "G", "U", "C"};
7383
private static Map<String, Integer> map;
7484
private static Map<Integer, List<String>> ringMap;
7585
static {
7686
map = new HashMap<>();
7787
map.put("DA", 0); map.put("ADE", 0); map.put("A", 0);
7888
map.put("DG", 1); map.put("GUA", 1); map.put("G", 1);
79-
map.put("DT", 2); map.put("THY", 2); map.put("T", 2); //RNA lines: map.put("U", 2); map.put("URA", 2);
89+
map.put("DT", 2); map.put("THY", 2); map.put("T", 2); map.put("U", 2); map.put("URA", 2);
8090
map.put("DC", 3); map.put("CYT", 3); map.put("C", 3);
81-
// chemically modified bases, leaving out right now.
91+
// chemically modified bases, leaving out to ignore (to treat as gaps) right now.
8292
//map.put("DZM", 0);
8393
//map.put("UCL", 2);
8494
//map.put("2DT", 2);
@@ -91,70 +101,108 @@ public class BasePairParameters {
91101
}
92102

93103
private Structure structure;
94-
private String pairSequence = "", pdbId = "";
95-
104+
private boolean useRNA = false;
96105
private double[] pairParameters;
106+
107+
// this is the main data that you want to get back out from the procedure.
108+
private String pairSequence = "";
97109
private double[][] pairingParameters;
98110
private double[][] stepParameters;
99111

100112

101-
// Either pass the name of a PDB file (ending in .pdb) or pass the pdbId
102-
public BasePairParameters(String name) {
103-
PDBFileReader pdbFileReader = new PDBFileReader();
104-
pdbFileReader.setPath(".");
105-
try {
106-
if (name.contains(".pdb")) structure = pdbFileReader.getStructure(name);
107-
else structure = StructureIO.getStructure(name);
108-
pdbId = name.replace(".pdb", "");
109-
List<Chain> nucleics = this.getNucleicChains(false);
110-
List<Group[]> pairs = this.findPairs(nucleics);
111-
pairingParameters = new double[pairs.size()][6];
112-
stepParameters = new double[pairs.size()][6];
113-
Matrix4d lastStep = null;
114-
Matrix4d currentStep = null;
115-
for (int i = 0; i < pairs.size(); i++) {
116-
lastStep = currentStep;
117-
currentStep = this.basePairReferenceFrame(pairs.get(i));
118-
for (int j = 0; j < 6; j++) pairingParameters[i][j] = pairParameters[j];
119-
if (i != 0) {
120-
lastStep.invert();
121-
lastStep.mul(currentStep);
122-
double[] sparms = calculatetp(lastStep);
123-
for (int j = 0; j < 6; j++) stepParameters[i][j] = sparms[j];
124-
}
125-
; }
126-
} catch (IOException|StructureException e) {
127-
log.info("Error reading file from local drive or internet");
128-
structure = null;
113+
/**
114+
* Constructor takes a Structure object, finds base pair and base-pair step parameters
115+
* for double-helical regions within the structure.
116+
* @param structure The already-loaded structure to analyze.
117+
* @param useRNA whether to look for canonical RNA pairs. By default (false) it analyzes DNA.
118+
* @param removeDups whether to only look for base-pair parameters for each unique sequence in
119+
* the structure (if set to <i>true</i>)
120+
*/
121+
public BasePairParameters(Structure structure, boolean useRNA, boolean removeDups) {
122+
this.structure = structure;
123+
this.useRNA = useRNA;
124+
if (structure == null) {
125+
pairingParameters = null;
126+
stepParameters = null;
127+
return;
129128
}
129+
List<Chain> nucleics = this.getNucleicChains(removeDups);
130+
List<Group[]> pairs = this.findPairs(nucleics);
131+
pairingParameters = new double[pairs.size()][6];
132+
stepParameters = new double[pairs.size()][6];
133+
Matrix4d lastStep = null;
134+
Matrix4d currentStep = null;
135+
for (int i = 0; i < pairs.size(); i++) {
136+
lastStep = currentStep;
137+
currentStep = this.basePairReferenceFrame(pairs.get(i));
138+
for (int j = 0; j < 6; j++) pairingParameters[i][j] = pairParameters[j];
139+
if (i != 0) {
140+
lastStep.invert();
141+
lastStep.mul(currentStep);
142+
double[] sparms = calculatetp(lastStep);
143+
for (int j = 0; j < 6; j++) stepParameters[i][j] = sparms[j];
144+
}
145+
; }
146+
130147
}
131148

149+
150+
/**
151+
* Constructor takes a Structure object, finds base pair and base-pair step parameters
152+
* for double-helical regions within the structure for only canonical DNA pairs.
153+
* @param structure The already-loaded structure to analyze.
154+
*/
155+
public BasePairParameters(Structure structure) {
156+
this(structure, false, false);
157+
}
158+
159+
/**
160+
* This reports all the pair parameters, in the order of:
161+
* buckle, propeller, opening (in degrees), shear, stagger, stretch (in Å).
162+
* @return A double[][] with length equal to number of base pairs for rows, and 6 columns
163+
*/
132164
public double[][] getPairingParameters() {
133165
return pairingParameters;
134166
}
135167

168+
/**
169+
* This reports all the base-pair step parameters, in the order of:
170+
* tilt, roll, twist (in degrees), shift, slide, rise (in Å).
171+
* @return A double[][] with length equal to number of base pairs (the first row 0 has no step
172+
* and therefore is six zeroes), and 6 columns.
173+
*/
136174
public double[][] getStepParameters() {
137175
return stepParameters;
138176
}
139177

178+
/**
179+
* This returns the primary strand's sequence where parameters were found.
180+
* There are spaces in the string anywhere there was a break in the helix or when
181+
* it goes from one helix to another helix in the structure. (the "step" is still returned!)
182+
* @return String of primary sequence with spaces between gaps and new helices.
183+
*/
140184
public String getPairSequence() {
141185
return pairSequence;
142186
}
143187

188+
/**
189+
* This reports all the nucleic acid chains and has an option to remove duplicates if you
190+
* are considering an analyze of only unique DNA or RNA helices in the Structure.
191+
* @param removeDups If true, it will ignore duplicate chains
192+
* @return A list of all the nucleic acid chains in order of the Structure
193+
*/
144194
public List<Chain> getNucleicChains(boolean removeDups) {
145195
if (structure == null) return new ArrayList<>();
146196
List<Chain> chains = structure.getChains();
147197
List<Chain> result = new ArrayList<>();
148198
for (Chain c: chains) {
149-
//EntityInfo ei = c.getEntityInfo();
150199
if (c.isNucleicAcid()) {
151200
result.add(c);
152201
}
153-
//result.add(c);
154202
}
155203
if (removeDups) for (int i = 0; i < result.size(); i++) {
156204
for (int j = i+2; j < result.size(); j++) {
157-
// remove double
205+
// remove duplicate sequences (structures with two or more identical units)
158206
if (result.get(i).getSeqResSequence().equals(result.get(j).getSeqResSequence())) {
159207
result.remove(j);
160208
}
@@ -164,12 +212,19 @@ public List<Chain> getNucleicChains(boolean removeDups) {
164212
}
165213

166214

215+
/**
216+
* This performs a search for base pairs in the structure. The criteria is alignment of
217+
* sequences and the canonical base pairs of DNA and RNA.
218+
* @param chains The list of chains already found to be nucleic acids
219+
* @return The list of corresponding Watson-Crick groups as pairs, element 0 is on the
220+
* forward strand and element 1 is on the reverse strand.
221+
*/
167222
public List<Group[]> findPairs(List<Chain> chains) {
168223
List<Group[]> result = new ArrayList<>();
169224
for (int i = 0; i < chains.size(); i++) {
170225
Chain c = chains.get(i);
171226
for (int j = i+1; j < chains.size(); j++) {
172-
String complement = complement(chains.get(j).getSeqResSequence(), false);
227+
String complement = complement(chains.get(j).getSeqResSequence(), useRNA);
173228
String match = longestCommonSubstring(c.getSeqResSequence(), complement);
174229
//log.info(c.getSeqResSequence() + " " + chains.get(j).getSeqResSequence() + " " + match);
175230
int index1 = c.getSeqResSequence().indexOf(match);
@@ -187,12 +242,12 @@ public List<Group[]> findPairs(List<Chain> chains) {
187242
Atom a2 = g2.getAtom(ringMap.get(type2).get(0));
188243

189244
if (a1 == null) {
190-
log.info("Error processing " + g1.getPDBName() + " in " + pdbId);
245+
log.info("Error processing " + g1.getPDBName());
191246
if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' ';
192247
continue;
193248
}
194249
if (a2 == null) {
195-
log.info("Error processing " + g2.getPDBName() + " in " + pdbId);
250+
log.info("Error processing " + g2.getPDBName());
196251
if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' ';
197252
continue;
198253
}
@@ -203,7 +258,7 @@ public List<Group[]> findPairs(List<Chain> chains) {
203258
double distance = Math.sqrt(dx*dx+dy*dy+dz*dz);
204259
//log.info("C8-C6 Distance (Å): " + distance);
205260
// could be a base pair
206-
if (Math.abs(distance-10.0) < 2.0) {
261+
if (Math.abs(distance-10.0) < 2.5) {
207262
boolean valid = true;
208263
for (String atomname : ringMap.get(type1)) {
209264
Atom a = g1.getAtom(atomname);
@@ -232,7 +287,6 @@ public List<Group[]> findPairs(List<Chain> chains) {
232287
}
233288

234289

235-
236290
public Matrix4d basePairReferenceFrame(Group[] pair) {
237291
Integer type1 = map.get(pair[0].getPDBName());
238292
Integer type2 = map.get(pair[1].getPDBName());
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
package org.biojava.nbio.structure.basepairs;
2+
3+
import org.biojava.nbio.structure.Structure;
4+
import org.biojava.nbio.structure.StructureException;
5+
import org.biojava.nbio.structure.StructureIO;
6+
import org.biojava.nbio.structure.basepairs.BasePairParameters;
7+
import org.junit.Test;
8+
9+
import java.io.IOException;
10+
11+
import static org.junit.Assert.*;
12+
13+
/**
14+
* Created by luke czapla on 7/21/17.
15+
*/
16+
public class TestBasePairParameters {
17+
18+
@Test
19+
public void testBasePair() {
20+
21+
Structure structure;
22+
try {
23+
structure = StructureIO.getStructure("1KX5");
24+
} catch (IOException|StructureException e) {
25+
e.printStackTrace();
26+
structure = null;
27+
assertEquals(1, 2);
28+
}
29+
BasePairParameters bp = new BasePairParameters(structure, false);
30+
double[][] pairs = bp.getPairingParameters();
31+
double[][] steps = bp.getStepParameters();
32+
String sequence = bp.getPairSequence();
33+
34+
assertEquals(sequence.trim().length(), 147);
35+
// below all this set of comparator data was from an external program, 3DNA.
36+
// next three in degrees: buckle, propeller, opening
37+
assertEquals(pairs[0][0], -3.796, 0.1);
38+
assertEquals(pairs[0][1], 4.482, 0.1);
39+
assertEquals(pairs[0][2], -0.730, 0.1);
40+
// next three in Å: shear, stretch, stagger
41+
assertEquals(pairs[0][3], -0.324, 0.01);
42+
assertEquals(pairs[0][4], -0.578, 0.01);
43+
assertEquals(pairs[0][5], -0.336, 0.01);
44+
// next three in degrees: tilt, roll, twist
45+
assertEquals(steps[1][0], 2.354, 0.1);
46+
assertEquals(steps[1][1], 0.785, 0.1);
47+
assertEquals(steps[1][2], 32.522, 1.0);
48+
// next three in Å, shift, slide, rise
49+
assertEquals(steps[1][3], -0.873, 0.01);
50+
assertEquals(steps[1][4], -0.607, 0.01);
51+
assertEquals(steps[1][5], 3.070, 0.01);
52+
53+
}
54+
55+
}
56+

biojava-structure/src/test/java/org/biojava/nbio/structure/secstruc/TestBasePairParameters.java

Lines changed: 0 additions & 39 deletions
This file was deleted.

0 commit comments

Comments
 (0)