Skip to content

Commit 751e6f0

Browse files
committed
Create templates for MultipleAlignment CLI biojava#278
Parameters, StartupParameters and UserArgumentProcessor classes. The old CEMC classed have been renamed to a more general, since the new version supports any pairwise algorithm to generate the seed.
1 parent 2850758 commit 751e6f0

File tree

12 files changed

+651
-157
lines changed

12 files changed

+651
-157
lines changed

biojava-structure-gui/src/main/java/demo/DemoCEMC.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88

99
import org.biojava.nbio.structure.Atom;
1010
import org.biojava.nbio.structure.StructureException;
11-
import org.biojava.nbio.structure.align.cemc.CeMcMain;
1211
import org.biojava.nbio.structure.align.gui.MultipleAlignmentDisplay;
1312
import org.biojava.nbio.structure.align.multiple.MultipleAlignment;
13+
import org.biojava.nbio.structure.align.multiple.mc.MultipleMcMain;
1414
import org.biojava.nbio.structure.align.util.AtomCache;
1515

1616
/**
@@ -34,7 +34,7 @@ public static void main(String[] args) throws IOException, StructureException, I
3434
//TIM barrels (MUSTA paper)
3535
//List<String> names = Arrays.asList("1tim.A", "1vzw", "1nsj", "3tha.A", "4enl", "2mnr", "7tim.A", "1tml", "1btc", "a1piia1", "6xia", "5rub.A", "2taa.B");
3636
//Helix-bundle (MUSTA paper)
37-
List<String> names = Arrays.asList("1bbh.A", "1aep", "1bge.B", "256b.A", "2ccy.A", "2hmz.A", "3ink.C");
37+
//List<String> names = Arrays.asList("1bbh.A", "1aep", "1bge.B", "256b.A", "2ccy.A", "2hmz.A", "3ink.C");
3838
//Calcium Binding (MUSTA paper)
3939
//List<String> names = Arrays.asList("4cpv", "2scp.A", "2sas", "1top", "1scm.B", "3icb");
4040
//Serine Rich Proteins SERP (MUSTA paper)
@@ -44,8 +44,8 @@ public static void main(String[] args) throws IOException, StructureException, I
4444
//GPCRs
4545
//List<String> names = Arrays.asList("2z73.A", "1u19.A", "4ug2.A", "4xt3", "4or2.A", "3odu.A");
4646
//Immunoglobulins (MAMMOTH paper)
47-
//List<String> names = Arrays.asList("2hla.B", "3hla.B", "1cd8", "2rhe", "1tlk", "1ten", "1ttf");
48-
//Globins (MAMMOTH and MUSTA papers)
47+
List<String> names = Arrays.asList("2hla.B", "3hla.B", "1cd8", "2rhe", "1tlk", "1ten", "1ttf");
48+
//Globins (MAMMOTH, POSA, Gerstein&Levitt and MUSTA papers)
4949
//List<String> names = Arrays.asList("1mbc", "1hlb", "1thb.A", "1ith.A", "1idr.A", "1dlw", "1kr7.A", "1ew6.A", "1it2.A", "1eco", "3sdh.A", "1cg5.B", "1fhj.B", "1ird.A", "1mba", "2gdm", "1b0b", "1h97.A", "1ash.A", "1jl7.A");
5050
//Rossman-Fold (POSA paper)
5151
//List<String> names = Arrays.asList("d1heta2", "d1ek6a_", "d1obfo1", "2cmd", "d1np3a2", "d1bgva1", "d1id1a_", "d1id1a_", "d1oi7a1");
@@ -66,7 +66,7 @@ public static void main(String[] args) throws IOException, StructureException, I
6666
}
6767

6868
//Here the multiple structural alignment algorithm comes in place to generate the alignment object
69-
CeMcMain algorithm = new CeMcMain();
69+
MultipleMcMain algorithm = new MultipleMcMain();
7070
MultipleAlignment result = algorithm.align(atomArrays);
7171
result.getEnsemble().setStructureNames(names);
7272

biojava-structure/src/main/java/org/biojava/nbio/structure/align/cemc/CeMcParameters.java

Lines changed: 0 additions & 34 deletions
This file was deleted.

biojava-structure/src/main/java/org/biojava/nbio/structure/align/multiple/BlockImpl.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ public class BlockImpl extends AbstractScoresCache implements Serializable, Bloc
1919
private int coreLength; //number of residues aligned without gaps (cache)
2020

2121
/**
22-
* Constructor.
22+
* Constructor. Links also the parent to this instance.
2323
* @param blockSet the parent BlockSet of the BlockImpl instance.
2424
* @return BlockImpl a BlockImpl instance linked to its parent BlockSet.
2525
*/

biojava-structure/src/main/java/org/biojava/nbio/structure/align/multiple/BlockSetImpl.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ public class BlockSetImpl extends AbstractScoresCache implements Serializable, B
2525
private int coreLength; //number of aligned positions without gaps (cache)
2626

2727
/**
28-
* Constructor.
28+
* Constructor. Links also the parent to this instance.
2929
* @param multipleAlignment the parent MultipleAlignment of the BlockImpl instance.
3030
* @return BlockSetImpl a BlockSetImpl instance linked to its parent MultipleAlignment.
3131
*/

biojava-structure/src/main/java/org/biojava/nbio/structure/align/multiple/MultipleAlignmentScorer.java

Lines changed: 17 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ public class MultipleAlignmentScorer {
4141
public static void calculateScores(MultipleAlignment alignment) throws StructureException {
4242

4343
//Put RMSD
44-
List<Atom[]> transformed = transformAtoms(alignment);
44+
List<Atom[]> transformed = MultipleAlignmentTools.transformAtoms(alignment);
4545
alignment.putScore(RMSD, getRMSD(transformed));
4646

4747
//Put TM-Score
@@ -52,82 +52,6 @@ public static void calculateScores(MultipleAlignment alignment) throws Structure
5252
alignment.putScore(AVG_TMSCORE, getAvgTMScore(transformed,lengths));
5353
}
5454

55-
/**
56-
* Transforms atoms according to the superposition stored in the alignment.
57-
* <p>
58-
* For each structure in the alignment, returns an atom for each
59-
* representative atom in the aligned columns, omitting unaligned residues
60-
* (i.e. an array of length <tt>alignment.length()</tt> ).
61-
* <p>
62-
* All blocks are concatenated together, so Atoms may not appear in the
63-
* same order as in their parent structure. If the alignment blocks contain
64-
* null residues (gaps), then the returned array will also contain null Atoms.
65-
*
66-
* @param alignment MultipleAlignment
67-
* @return
68-
*/
69-
public static List<Atom[]> transformAtoms(MultipleAlignment alignment) {
70-
if(alignment.getEnsemble() == null ) {
71-
throw new NullPointerException("No ensemble set for this alignment");
72-
}
73-
74-
List<Atom[]> atomArrays = alignment.getEnsemble().getAtomArrays();
75-
List<Atom[]> transformed = new ArrayList<Atom[]>(atomArrays.size());
76-
77-
//Loop through structures
78-
for (int i=0; i<atomArrays.size(); i++){
79-
80-
Matrix4d transform = null;
81-
if( alignment.getTransformations() != null) {
82-
transform = alignment.getTransformations().get(i);
83-
}
84-
Atom[] curr = atomArrays.get(i); // all CA atoms from structure
85-
86-
//Concatenated list of all blocks for this structure
87-
Atom[] transformedAtoms = new Atom[alignment.length()];
88-
int transformedAtomsLength = 0;
89-
90-
// Each blockset gets transformed independently
91-
for( BlockSet bs : alignment.getBlockSets()) {
92-
93-
Atom[] blocksetAtoms = new Atom[bs.length()];
94-
95-
for( Block blk : bs.getBlocks() ) {
96-
if( blk.size() != atomArrays.size()) {
97-
throw new IllegalStateException(String.format(
98-
"Mismatched block length. Expected %d structures, found %d.",
99-
atomArrays.size(),blk.size() ));
100-
}
101-
//Extract aligned atoms
102-
for (int j=0; j<blk.length(); j++){
103-
Integer alignedPos = blk.getAlignRes().get(i).get(j);
104-
if (alignedPos != null) {
105-
blocksetAtoms[j] = (Atom) curr[alignedPos].clone();
106-
}
107-
}
108-
}
109-
110-
// transform according to (1) the blockset matrix, or (2) the alignment matrix
111-
Matrix4d blockTrans = null;
112-
if(bs.getTransformations() != null)
113-
blockTrans = bs.getTransformations().get(i);
114-
if(blockTrans == null) {
115-
blockTrans = transform;
116-
}
117-
118-
for(Atom a : blocksetAtoms) {
119-
if (a!=null) Calc.transform(a, blockTrans);
120-
transformedAtoms[transformedAtomsLength] = a;
121-
transformedAtomsLength++;
122-
}
123-
}
124-
assert(transformedAtomsLength == alignment.length());
125-
126-
transformed.add(transformedAtoms);
127-
}
128-
return transformed;
129-
}
130-
13155
/**
13256
* Calculates the RMSD of all-to-all structure comparisons (distances) of the
13357
* given MultipleAlignment. <p>
@@ -142,7 +66,7 @@ public static List<Atom[]> transformAtoms(MultipleAlignment alignment) {
14266
* @return double RMSD
14367
*/
14468
public static double getRMSD(MultipleAlignment alignment) {
145-
List<Atom[]> transformed = transformAtoms(alignment);
69+
List<Atom[]> transformed = MultipleAlignmentTools.transformAtoms(alignment);
14670
return getRMSD(transformed);
14771
}
14872
/**
@@ -183,7 +107,7 @@ private static double getRMSD(List<Atom[]> transformed) {
183107
}
184108

185109
public static double getRefRMSD(MultipleAlignment alignment, int reference) {
186-
List<Atom[]> transformed = transformAtoms(alignment);
110+
List<Atom[]> transformed = MultipleAlignmentTools.transformAtoms(alignment);
187111
return getRefRMSD(transformed,reference);
188112
}
189113
/**
@@ -244,7 +168,7 @@ private static double getRefRMSD(List<Atom[]> transformed, int reference) {
244168
* @throws StructureException
245169
*/
246170
public static double getAvgTMScore(MultipleAlignment alignment) throws StructureException {
247-
List<Atom[]> transformed = transformAtoms(alignment);
171+
List<Atom[]> transformed = MultipleAlignmentTools.transformAtoms(alignment);
248172
List<Integer> lengths = new ArrayList<Integer>(alignment.size());
249173
for(Atom[] atoms : alignment.getEnsemble().getAtomArrays()) {
250174
lengths.add(atoms.length);
@@ -304,7 +228,7 @@ private static double getAvgTMScore(List<Atom[]> transformed, List<Integer> leng
304228
* @throws StructureException
305229
*/
306230
public static double getRefTMScore(MultipleAlignment alignment, int reference) throws StructureException {
307-
List<Atom[]> transformed = transformAtoms(alignment);
231+
List<Atom[]> transformed = MultipleAlignmentTools.transformAtoms(alignment);
308232
List<Integer> lengths = new ArrayList<Integer>(alignment.size());
309233
for(Atom[] atoms : alignment.getEnsemble().getAtomArrays()) {
310234
lengths.add(atoms.length);
@@ -366,7 +290,7 @@ private static double getRefTMScore(List<Atom[]> transformed, List<Integer> leng
366290
*/
367291
public static double getCEMCScore(MultipleAlignment alignment) throws StructureException {
368292
//Transform Atoms
369-
List<Atom[]> transformed = transformAtoms(alignment);
293+
List<Atom[]> transformed = MultipleAlignmentTools.transformAtoms(alignment);
370294
//Calculate d0
371295
int minLen = Integer.MAX_VALUE;
372296
for(Atom[] atoms : alignment.getEnsemble().getAtomArrays())
@@ -375,6 +299,17 @@ public static double getCEMCScore(MultipleAlignment alignment) throws StructureE
375299
return getCEMCScore(transformed, d0);
376300
}
377301

302+
/**
303+
* Calculates the CEMC score, specific for the MultipleAlignment algorithm.
304+
* The score function is modified from the original CEMC paper, making it
305+
* continuous and differentiable.<p>
306+
* Complexity: T(n,l) = O(l*n^2), if n=number of structures and l=alignment length.
307+
*
308+
* @param transformed List of transformed Atom arrays
309+
* @param d0 parameter for the distance evaluation
310+
* @return
311+
* @throws StructureException
312+
*/
378313
private static double getCEMCScore(List<Atom[]> transformed, double d0) throws StructureException {
379314

380315
int size = transformed.size();

biojava-structure/src/main/java/org/biojava/nbio/structure/align/multiple/MultipleAlignmentTools.java

Lines changed: 127 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@
66
import java.util.SortedSet;
77
import java.util.TreeSet;
88

9+
import javax.vecmath.Matrix4d;
10+
911
import org.biojava.nbio.structure.Atom;
12+
import org.biojava.nbio.structure.Calc;
1013
import org.biojava.nbio.structure.StructureTools;
1114
import org.biojava.nbio.structure.jama.Matrix;
1215

@@ -380,15 +383,131 @@ public static int getBlockForSequencePosition(MultipleAlignment multAln, List<In
380383
* Complexity: T(n,l) = O(l*n^2), if n=number of structures and l=alignment length.
381384
*
382385
* @param alignment MultipleAlignment
383-
* @return Matrix containing all average residue distances in alignmed columns
386+
* @return Matrix containing all average residue distances
384387
*/
385388
public static Matrix getAverageResidueDistances(MultipleAlignment alignment){
386-
387-
388-
389-
390-
391-
392-
return null;
389+
//Transform Atoms
390+
List<Atom[]> transformed = transformAtoms(alignment);
391+
return getAverageResidueDistances(transformed);
393392
}
393+
394+
/**
395+
* The average residue distance Matrix contains the average distance from each residue to all
396+
* other residues aligned with it. <p>
397+
* Complexity: T(n,l) = O(l*n^2), if n=number of structures and l=alignment length.
398+
*
399+
* @param transformed List of Atom arrays containing only the aligned atoms of each structure, or null if there is a gap.
400+
* @return Matrix containing all average residue distances. Entry -1 means there is a gap in the position.
401+
*/
402+
public static Matrix getAverageResidueDistances(List<Atom[]> transformed){
403+
404+
int size = transformed.size();
405+
int length = transformed.get(0).length;
406+
Matrix residueDistances = new Matrix(size,length,-1); //A residue distance is the average distance to all others
407+
408+
//Calculate the average residue distances
409+
for (int r1=0; r1<size; r1++){
410+
for(int c=0;c<transformed.get(r1).length;c++) {
411+
Atom refAtom = transformed.get(r1)[c];
412+
if(refAtom == null) continue;
413+
414+
for(int r2=r1+1;r2<size;r2++) {
415+
Atom atom = transformed.get(r2)[c];
416+
if(atom != null) {
417+
double distance = Calc.getDistance(refAtom, atom);
418+
if (residueDistances.get(r1, c) == -1) residueDistances.set(r1, c, 1+distance);
419+
else residueDistances.set(r1, c, residueDistances.get(r1, c)+distance);
420+
if (residueDistances.get(r2, c) == -1) residueDistances.set(r2, c, 1+distance);
421+
else residueDistances.set(r2, c, residueDistances.get(r2, c)+distance);
422+
}
423+
}
424+
}
425+
}
426+
for(int c=0;c<length;c++) {
427+
int nonNullRes = 0;
428+
for(int r=0;r<size;r++) {
429+
if (residueDistances.get(r, c) != -1) nonNullRes++;
430+
}
431+
for(int r=0;r<size;r++) {
432+
if (residueDistances.get(r, c) != -1) residueDistances.set(r, c, residueDistances.get(r, c)/nonNullRes);
433+
}
434+
}
435+
return residueDistances;
436+
}
437+
438+
/**
439+
* Transforms atoms according to the superposition stored in the alignment.
440+
* <p>
441+
* For each structure in the alignment, returns an atom for each
442+
* representative atom in the aligned columns, omitting unaligned residues
443+
* (i.e. an array of length <tt>alignment.length()</tt> ).
444+
* <p>
445+
* All blocks are concatenated together, so Atoms may not appear in the
446+
* same order as in their parent structure. If the alignment blocks contain
447+
* null residues (gaps), then the returned array will also contain null Atoms.
448+
*
449+
* @param alignment MultipleAlignment
450+
* @return List of Atom arrays of only the aligned atoms of every structure (null Atom if a gap position)
451+
*/
452+
public static List<Atom[]> transformAtoms(MultipleAlignment alignment) {
453+
if(alignment.getEnsemble() == null ) {
454+
throw new NullPointerException("No ensemble set for this alignment");
455+
}
456+
457+
List<Atom[]> atomArrays = alignment.getEnsemble().getAtomArrays();
458+
List<Atom[]> transformed = new ArrayList<Atom[]>(atomArrays.size());
459+
460+
//Loop through structures
461+
for (int i=0; i<atomArrays.size(); i++){
462+
463+
Matrix4d transform = null;
464+
if( alignment.getTransformations() != null) {
465+
transform = alignment.getTransformations().get(i);
466+
}
467+
Atom[] curr = atomArrays.get(i); // all CA atoms from structure
468+
469+
//Concatenated list of all blocks for this structure
470+
Atom[] transformedAtoms = new Atom[alignment.length()];
471+
int transformedAtomsLength = 0;
472+
473+
// Each blockset gets transformed independently
474+
for( BlockSet bs : alignment.getBlockSets()) {
475+
476+
Atom[] blocksetAtoms = new Atom[bs.length()];
477+
478+
for( Block blk : bs.getBlocks() ) {
479+
if( blk.size() != atomArrays.size()) {
480+
throw new IllegalStateException(String.format(
481+
"Mismatched block length. Expected %d structures, found %d.",
482+
atomArrays.size(),blk.size() ));
483+
}
484+
//Extract aligned atoms
485+
for (int j=0; j<blk.length(); j++){
486+
Integer alignedPos = blk.getAlignRes().get(i).get(j);
487+
if (alignedPos != null) {
488+
blocksetAtoms[j] = (Atom) curr[alignedPos].clone();
489+
}
490+
}
491+
}
492+
493+
// transform according to (1) the blockset matrix, or (2) the alignment matrix
494+
Matrix4d blockTrans = null;
495+
if(bs.getTransformations() != null)
496+
blockTrans = bs.getTransformations().get(i);
497+
if(blockTrans == null) {
498+
blockTrans = transform;
499+
}
500+
501+
for(Atom a : blocksetAtoms) {
502+
if (a!=null) Calc.transform(a, blockTrans);
503+
transformedAtoms[transformedAtomsLength] = a;
504+
transformedAtomsLength++;
505+
}
506+
}
507+
assert(transformedAtomsLength == alignment.length());
508+
509+
transformed.add(transformedAtoms);
510+
}
511+
return transformed;
512+
}
394513
}

0 commit comments

Comments
 (0)