Skip to content

Commit 20010a5

Browse files
committed
Add more examples for the MultipleAlignment algorithm
1 parent 157d3fa commit 20010a5

File tree

8 files changed

+36
-29
lines changed

8 files changed

+36
-29
lines changed

biojava-structure-gui/src/main/java/demo/DemoMultipleMC.java

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
import org.biojava.nbio.structure.align.util.AtomCache;
1717

1818
/**
19-
* Demo for running the CEMC Algorithm on a protein family and visualizing the results.
19+
* Demo for running the CEMC Algorithm on a protein family and
20+
* visualizing the results.
2021
* Choose the family by commenting out the protein family names.
2122
*
2223
* @author Aleix Lafita
@@ -26,13 +27,12 @@ public class DemoMultipleMC {
2627

2728
public static void main(String[] args) throws IOException, StructureException, InterruptedException, ExecutionException {
2829

29-
//Specify the structures to align
3030
//ASP-proteinases (CEMC paper)
3131
//List<String> names = Arrays.asList("3app", "4ape", "2apr", "5pep", "1psn", "4cms", "1bbs.A", "1smr.A", "2jxr.A", "1mpp", "2asi", "1am5");
3232
//Protein Kinases (CEMC paper)
3333
//List<String> names = Arrays.asList("1cdk.A", "1cja.A", "1csn", "1b6c.B", "1ir3.A", "1fgk.A", "1byg.A", "1hck", "1blx.A", "3erk", "1bmk.A", "1kob.A", "1tki.A", "1phk", "1a06");
3434
//DHFR (Gerstein 1998 paper)
35-
List<String> names = Arrays.asList("d1dhfa_", "8dfr", "d4dfra_", "3dfr");
35+
//List<String> names = Arrays.asList("d1dhfa_", "8dfr", "d4dfra_", "3dfr");
3636
//TIM barrels (MUSTA paper)
3737
//List<String> names = Arrays.asList("1tim.A", "1vzw", "1nsj", "3tha.A", "4enl", "2mnr", "7tim.A", "1tml", "1btc", "a1piia1", "6xia", "5rub.A", "2taa.B");
3838
//Calcium Binding (MUSTA paper)
@@ -54,7 +54,13 @@ public static void main(String[] args) throws IOException, StructureException, I
5454
//Circular Permutations: SAND and MFPT domains
5555
//List<String> names = Arrays.asList("d2bjqa1", "d1h5pa_", "d1ufna_"); //"d1oqja"
5656
//Flexible domain family of proteins (FatCat paper?)
57-
57+
//Amonium Transporters (Aleix Bachelor's Thesis)
58+
//List<String> names = Arrays.asList("1xqf.A","2b2f.A", "3b9w.A","3hd6.A");
59+
//Cytochrome C Oxidases (Aleix Bachelor's Thesis)
60+
//List<String> names = Arrays.asList("2dyr.A","2gsm.A","2yev.A","3hb3.A","3omn.A","1fft.A","1xme.A","3o0r.B","3ayf.A");
61+
List<String> names = Arrays.asList("2dyr.A","2gsm.A","1fft.A","1xme.A","3o0r.B","3ayf.A");
62+
//Cation Transporting ATPases (Aleix Bachelor's Thesis)
63+
//List<String> names = Arrays.asList("3b8e.A","2zxe.A", "3tlm.A","1iwo.A");
5864
//Ankyrin Repeats
5965
//List<String> names = Arrays.asList("d1n0ra_", "3ehq.A", "1awc.B"); //ankyrin
6066

biojava-structure/src/main/java/demo/DemoMultipleMC.java

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ public static void main(String[] args) throws IOException, StructureException, I
3434
//Protein Kinases (CEMC paper)
3535
//List<String> names = Arrays.asList("1cdk.A", "1cja.A", "1csn", "1b6c.B", "1ir3.A", "1fgk.A", "1byg.A", "1hck", "1blx.A", "3erk", "1bmk.A", "1kob.A", "1tki.A", "1phk", "1a06");
3636
//DHFR (Gerstein 1998 paper)
37-
List<String> names = Arrays.asList("d1dhfa_", "8dfr", "d4dfra_", "3dfr");
37+
//List<String> names = Arrays.asList("d1dhfa_", "8dfr", "d4dfra_", "3dfr");
3838
//TIM barrels (MUSTA paper)
3939
//List<String> names = Arrays.asList("1tim.A", "1vzw", "1nsj", "3tha.A", "4enl", "2mnr", "7tim.A", "1tml", "1btc", "a1piia1", "6xia", "5rub.A", "2taa.B");
4040
//Calcium Binding (MUSTA paper)
@@ -55,8 +55,12 @@ public static void main(String[] args) throws IOException, StructureException, I
5555
//List<String> names = Arrays.asList("d1u0la2", "d1jwyb_");
5656
//Circular Permutations: SAND and MFPT domains
5757
//List<String> names = Arrays.asList("d2bjqa1", "d1h5pa_", "d1ufna_"); //"d1oqja"
58-
//Flexible domain family of proteins (FatCat paper?)
59-
58+
//Amonium Transporters (Aleix Bachelor's Thesis)
59+
//List<String> names = Arrays.asList("1xqf.A","2b2f.A", "3b9w.A","3hd6.A");
60+
//Cytochrome C Oxidases (Aleix Bachelor's Thesis)
61+
//List<String> names = Arrays.asList("2dyr.A","2gsm.A","2yev.A","3hb3.A","3omn.A","1fft.A","1xme.A","3o0r.B","3ayf.A");
62+
//Cation Transporting ATPases (Aleix Bachelor's Thesis)
63+
List<String> names = Arrays.asList("3b8e.A","2zxe.A", "3tlm.A","1iwo.A");
6064
//Ankyrin Repeats
6165
//List<String> names = Arrays.asList("d1n0ra_", "3ehq.A", "1awc.B"); //ankyrin
6266

@@ -70,7 +74,8 @@ public static void main(String[] args) throws IOException, StructureException, I
7074
//Here the multiple structural alignment algorithm comes in place to generate the alignment object
7175
MultipleMcMain algorithm = new MultipleMcMain(new CeCPMain());
7276
MultipleMcParameters params = (MultipleMcParameters) algorithm.getParameters();
73-
params.setMinBlockLen(10);
77+
params.setMinBlockLen(15);
78+
params.setMinAlignedStructures(10);
7479

7580
MultipleAlignment result = algorithm.align(atomArrays);
7681
result.getEnsemble().setStructureNames(names);

biojava-structure/src/main/java/org/biojava/nbio/structure/StructureTools.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -621,6 +621,7 @@ public static final Atom[] getRepresentativeAtomArray(Chain c) {
621621
public static final Atom[] cloneCAArray(Atom[] ca) {
622622
return cloneAtomArray(ca);
623623
}
624+
624625
/** Provides an equivalent copy of Atoms in a new array. Clones everything, starting with parent
625626
* groups and chains. The chain will only contain groups that are part of the input array.
626627
*

biojava-structure/src/main/java/org/biojava/nbio/structure/align/helper/AlignTools.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -160,13 +160,15 @@ public static double rms_dk_diag(double[] dk1, double[] dk2, int i, int j, int l
160160

161161
}
162162

163-
/** matrix of all distances between two sets of 3d coords"
163+
/**
164+
* Matrix of all distances between two sets of Atoms. Does not
165+
* superimpose or modify the Atoms.
164166
*
165167
* @param ca1
166168
* @param ca2
167-
* @return a Matrixd
169+
* @return a Matrix
168170
*/
169-
public static Matrix getDistanceMatrix(Atom[] ca1, Atom[]ca2){
171+
public static Matrix getDistanceMatrix(Atom[] ca1, Atom[] ca2){
170172

171173
int r = ca1.length;
172174
int c = ca2.length;
@@ -180,11 +182,9 @@ public static Matrix getDistanceMatrix(Atom[] ca1, Atom[]ca2){
180182

181183
double d = Calc.getDistance(a1,b1);
182184
out.set(i,j,d);
183-
184185
}
185186
}
186187
return out;
187188
}
188189

189-
190190
}

biojava-structure/src/main/java/org/biojava/nbio/structure/align/multiple/MultipleAlignmentEnsembleImpl.java

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.biojava.nbio.structure.Calc;
1313
import org.biojava.nbio.structure.StructureException;
1414
import org.biojava.nbio.structure.StructureTools;
15+
import org.biojava.nbio.structure.align.helper.AlignTools;
1516
import org.biojava.nbio.structure.align.model.AFPChain;
1617
import org.biojava.nbio.structure.align.util.AtomCache;
1718
import org.biojava.nbio.structure.jama.Matrix;
@@ -305,18 +306,8 @@ public void updateDistanceMatrix() {
305306
distanceMatrix = new ArrayList<Matrix>();
306307

307308
for (int s=0; s<size(); s++){
308-
int n = atomArrays.get(s).length;
309-
Matrix distMat = new Matrix(n,n);
310-
311-
//Calculate all distances between every pair of atoms
312-
for (int a1=0; a1<n; a1++){
313-
for (int a2=0; a2<n; a2++){
314-
Atom at1 = atomArrays.get(s)[a1];
315-
Atom at2 = atomArrays.get(s)[a2];
316-
double dist = Calc.getDistance(at1, at2);
317-
distMat.set(a1, a2, dist);
318-
}
319-
}
309+
Atom[] ca = atomArrays.get(s);
310+
Matrix distMat =AlignTools.getDistanceMatrix(ca, ca);
320311
distanceMatrix.add(distMat);
321312
}
322313
}

biojava-structure/src/main/java/org/biojava/nbio/structure/align/multiple/mc/MultipleMcMain.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -313,8 +313,8 @@ public MultipleAlignment align(List<Atom[]> atomArrays, Object params)
313313
List<Future<MultipleAlignment>> afpFuture =
314314
new ArrayList<Future<MultipleAlignment>>();
315315

316-
//Repeat the optimization in parallel 5 times
317-
for (int i=0; i<5; i++){
316+
//Repeat the optimization in parallel
317+
for (int i=0; i<2; i++){
318318
//Change the random seed for each parallelization
319319
MultipleMcParameters paramsMC = (MultipleMcParameters) params;
320320
paramsMC.setRandomSeed(paramsMC.getRandomSeed()+i);

biojava-structure/src/main/java/org/biojava/nbio/structure/align/multiple/mc/MultipleMcOptimizer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
public class MultipleMcOptimizer
4444
implements Callable<MultipleAlignment> {
4545

46-
private static final boolean debug = false;
46+
private static final boolean debug = true;
4747
private Random rnd;
4848
private MultipleSuperimposer imposer;
4949

biojava-structure/src/main/java/org/biojava/nbio/structure/align/util/AlignmentTools.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,13 @@
3434
import java.util.regex.Pattern;
3535

3636
/**
37-
* Some utility methods for analyzing and manipulating AFPChains.
37+
* Methods for analyzing and manipulating AFPChains and for
38+
* other pairwise alignment utilities. <p>
39+
* Current methods: replace optimal alignment, create new AFPChain,
40+
* format conversion, update superposition, etc.
3841
*
3942
* @author Spencer Bliven
43+
* @author Aleix Lafita
4044
*
4145
*/
4246
public class AlignmentTools {

0 commit comments

Comments
 (0)