Skip to content

Commit 634cd7b

Browse files
committed
Improvements in CEMC score and algorithm
Text output also has been improved in MultipleAlignmentTools
1 parent ecc3133 commit 634cd7b

File tree

10 files changed

+364
-89
lines changed

10 files changed

+364
-89
lines changed

biojava-structure-gui/src/main/java/demo/DemoCEMC.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,21 +38,23 @@ public static void main(String[] args) throws IOException, StructureException, I
3838
//Calcium Binding (MUSTA paper)
3939
//List<String> names = Arrays.asList("4cpv", "2scp.A", "2sas", "1top", "1scm.B", "3icb");
4040
//Serine Rich Proteins SERP (MUSTA paper)
41-
//List<String> names = Arrays.asList("7api.A", "8api.A", "1hle.A", "1ova.A", "2ach.A", "9api.A", "1psi", "1atu", "1kct", "1ath.A", "1att.A");
41+
List<String> names = Arrays.asList("7api.A", "8api.A", "1hle.A", "1ova.A", "2ach.A", "9api.A", "1psi", "1atu", "1kct", "1ath.A", "1att.A");
4242
//Serine Proteases (MUSTA paper)
4343
//List<String> names = Arrays.asList("1cse.E", "1sbn.E", "1pek.E", "3prk", "3tec.E");
4444
//GPCRs
4545
//List<String> names = Arrays.asList("2z73.A", "1u19.A", "4ug2.A", "4xt3", "4or2.A", "3odu.A");
4646
//Immunoglobulins (MAMMOTH paper)
4747
//List<String> names = Arrays.asList("2hla.B", "3hla.B", "1cd8", "2rhe", "1tlk", "1ten", "1ttf");
4848
//Globins (MAMMOTH and MUSTA papers)
49-
List<String> names = Arrays.asList("1mbc", "1hlb", "1thb.A", "1ith.A", "1idr.A", "1dlw", "1kr7.A", "1ew6.A", "1it2.A", "1eco", "3sdh.A", "1cg5.B", "1fhj.B", "1ird.A", "1mba", "2gdm", "1b0b", "1h97.A", "1ash.A", "1jl7.A");
49+
//List<String> names = Arrays.asList("1mbc", "1hlb", "1thb.A", "1ith.A", "1idr.A", "1dlw", "1kr7.A", "1ew6.A", "1it2.A", "1eco", "3sdh.A", "1cg5.B", "1fhj.B", "1ird.A", "1mba", "2gdm", "1b0b", "1h97.A", "1ash.A", "1jl7.A");
5050
//Rossman-Fold (POSA paper)
5151
//List<String> names = Arrays.asList("d1heta2", "d1ek6a_", "d1obfo1", "2cmd", "d1np3a2", "d1bgva1", "d1id1a_", "d1id1a_", "d1oi7a1");
5252
//Circular Permutations (Bliven CECP paper) - dynamin GTP-ase with CP G-domain
5353
//List<String> names = Arrays.asList("d1u0la2", "d1jwyb_");
5454
//Circular Permutations: SAND and MFPT domains
5555
//List<String> names = Arrays.asList("d2bjqa1", "d1h5pa_", "d1ufna_"); //"d1oqja"
56+
//Ankyrin Repeats
57+
//List<String> names = Arrays.asList("d1n0ra_", "3ehq.A", "1awc.B"); //ankyrin
5658

5759
//Load the CA atoms of the structures
5860
AtomCache cache = new AtomCache();

biojava-structure-gui/src/main/java/demo/DemoMultipleAlignmentJmol.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ public static void main(String[] args) throws IOException, StructureException {
5151
//MultipleAlignmentEnsemble ensemble = new MultipleAlignmentEnsembleImpl(afpChain, atomArrays.get(0),atomArrays.get(1));
5252
//MultipleAlignment pairwise = ensemble.getMultipleAlignments().get(0);
5353

54-
//System.out.println(MultipleAlignmentWriter.toFASTA(fakeMultAln));
54+
System.out.println(MultipleAlignmentWriter.toFASTA(fakeMultAln));
5555
MultipleAlignmentDisplay.display(fakeMultAln);
5656
//StructureAlignmentDisplay.display(pairwise);
5757
//For comparison display the original AFP
@@ -82,9 +82,11 @@ private static MultipleAlignment fakeMultipleAlignment(String family, List<Atom[
8282
BlockSet blockSet2 = new BlockSetImpl(fakeMultAln); //second BlockSet with 1 Block
8383

8484
Block block1 = new BlockImpl(blockSet1);
85+
block1.setAlignRes(new ArrayList<List<Integer>>());
8586
Block block2 = new BlockImpl(blockSet1);
86-
87+
block2.setAlignRes(new ArrayList<List<Integer>>());
8788
Block block3 = new BlockImpl(blockSet2);
89+
block3.setAlignRes(new ArrayList<List<Integer>>());
8890

8991
//Alignment obtained from MUSTANG multiple alignment (just some of the residues, not the whole alignment)
9092
List<Integer> aligned11 = Arrays.asList(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21);

biojava-structure-gui/src/main/java/org/biojava/nbio/structure/align/gui/MenuCreator.java

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ public class MenuCreator {
6262
public static final String FASTA_FORMAT = "View FASTA Alignment";
6363
public static final String DIST_MATRICES = "Show Distance Matrices";
6464
public static final String DOT_PLOT = "Show Dot Plot";
65+
public static final String PAIRWISE_ALIGN = "New Pairwise Alignment";
66+
public static final String MULTIPLE_ALIGN = "New Multiple Alignment";
6567
protected static final int keyMask = Toolkit.getDefaultToolkit().getMenuShortcutKeyMask();
6668

6769
/**
@@ -669,23 +671,46 @@ protected static JMenuItem getPairwiseAlignmentMenuItem() {
669671

670672
JMenuItem pairI ;
671673
if ( alignIcon == null)
672-
pairI = new JMenuItem("New Alignment");
674+
pairI = new JMenuItem(PAIRWISE_ALIGN);
673675
else
674-
pairI = new JMenuItem("New Alignment", alignIcon);
676+
pairI = new JMenuItem(PAIRWISE_ALIGN, alignIcon);
675677
pairI.setMnemonic(KeyEvent.VK_N);
676678
pairI.setAccelerator(KeyStroke.getKeyStroke(KeyEvent.VK_N, keyMask));
677679
pairI.addActionListener(new ActionListener() {
678680
@Override
679681
public void actionPerformed(ActionEvent e) {
680682
String cmd = e.getActionCommand();
681683

682-
if ( cmd.equals("New Alignment")){
684+
if ( cmd.equals(PAIRWISE_ALIGN)){
683685
MenuCreator.showPairDialog();
684686
}
685687
}
686688
});
687689
return pairI;
688690
}
691+
692+
protected static JMenuItem getMultipleAlignmentMenuItem() {
693+
ImageIcon alignIcon = createImageIcon("/icons/window_new.png");
694+
695+
JMenuItem multipleI ;
696+
if ( alignIcon == null)
697+
multipleI = new JMenuItem(MULTIPLE_ALIGN);
698+
else
699+
multipleI = new JMenuItem(MULTIPLE_ALIGN, alignIcon);
700+
multipleI.setMnemonic(KeyEvent.VK_N);
701+
multipleI.setAccelerator(KeyStroke.getKeyStroke(KeyEvent.VK_N, keyMask));
702+
multipleI.addActionListener(new ActionListener() {
703+
@Override
704+
public void actionPerformed(ActionEvent e) {
705+
String cmd = e.getActionCommand();
706+
707+
if ( cmd.equals(MULTIPLE_ALIGN)){
708+
MenuCreator.showPairDialog();
709+
}
710+
}
711+
});
712+
return multipleI;
713+
}
689714

690715
/**
691716
* Creates a frame to display a DotPlotPanel.

biojava-structure-gui/src/main/java/org/biojava/nbio/structure/align/gui/aligpanel/MultipleAligPanel.java

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,8 @@ public void paintComponent(Graphics g){
230230
for (int st=0; st<size-1; st++){
231231
if (alnSeq.get(st).charAt(i) != '-') c1 = alnSeq.get(st).charAt(i);
232232
char c2 = alnSeq.get(st+1).charAt(i);
233-
if (c1=='-' || c2=='-') continue; //If any position is a gap continue, not comparable
233+
//If any position is a gap continue, not comparable
234+
if (c1=='-' || c2=='-' || Character.isLowerCase(c1) || Character.isLowerCase(c2)) continue;
234235
if (equal && c1 == c2) continue;
235236
else equal = false;
236237
if (AFPAlignmentDisplay.aaScore(c1, c2) > 0) continue;
@@ -242,7 +243,7 @@ public void paintComponent(Graphics g){
242243
}
243244
//Color by alignment block the same way as in the Jmol is done (darkening colors)
244245
else if (colorByAlignmentBlock){
245-
int blockNr = MultipleAlignmentTools.getBlockForAligPos(multAln,mapSeqToStruct,i);
246+
int blockNr = MultipleAlignmentTools.getBlockForSequencePosition(multAln,mapSeqToStruct,i);
246247
bg = jmol.getColorPalette().getColorPalette(multAln.getBlocks().size())[blockNr];
247248
}
248249
if (isSelected(i)) bg = Color.YELLOW;
@@ -260,24 +261,33 @@ else if (colorByAlignmentBlock){
260261
}
261262
}
262263

263-
int nrLines = length / MultipleAlignmentCoordManager.DEFAULT_LINE_LENGTH;
264+
int nrLines = (length-1) / (MultipleAlignmentCoordManager.DEFAULT_LINE_LENGTH);
264265

265266
for (int i = 0 ; i < nrLines+1 ; i++){
266267

267268
// draw legend at i
268269
for (int str=0; str<size; str++){
270+
269271
Point p1 = coordManager.getLegendPosition(i,str);
270272

271273
int aligPos = i * MultipleAlignmentCoordManager.DEFAULT_LINE_LENGTH;
272-
Atom a1 = MultipleAlignmentTools.getAtomForAligPos(multAln, mapSeqToStruct, str, aligPos);
274+
Atom a1 = null;
275+
while (a1==null && aligPos < Math.min((i+1)*MultipleAlignmentCoordManager.DEFAULT_LINE_LENGTH-1,length)){
276+
a1 = MultipleAlignmentTools.getAtomForSequencePosition(multAln, mapSeqToStruct, str, aligPos);
277+
aligPos++;
278+
}
273279
String label1 = JmolTools.getPdbInfo(a1,false);
274280
g2D.drawString(label1, p1.x,p1.y);
275281

276282
Point p3 = coordManager.getEndLegendPosition(i,str);
277-
278-
aligPos = i * MultipleAlignmentCoordManager.DEFAULT_LINE_LENGTH + MultipleAlignmentCoordManager.DEFAULT_LINE_LENGTH -1 ;
283+
284+
aligPos = (i*MultipleAlignmentCoordManager.DEFAULT_LINE_LENGTH + MultipleAlignmentCoordManager.DEFAULT_LINE_LENGTH - 1);
279285
if (aligPos > length) aligPos = length-1;
280-
Atom a3 = MultipleAlignmentTools.getAtomForAligPos(multAln, mapSeqToStruct, str, aligPos);
286+
Atom a3 = null;
287+
while (a3==null && aligPos > Math.max(i*MultipleAlignmentCoordManager.DEFAULT_LINE_LENGTH,0)){
288+
a3 = MultipleAlignmentTools.getAtomForSequencePosition(multAln, mapSeqToStruct, str, aligPos);
289+
aligPos--;
290+
}
281291

282292
String label3 = JmolTools.getPdbInfo(a3,false);
283293

@@ -309,7 +319,7 @@ private void updateJmolDisplay() {
309319
for (int i=0; i<length; i++){
310320
if (selection.get(i)){
311321
for (int str=0; str<size; str++){
312-
Atom a = MultipleAlignmentTools.getAtomForAligPos(multAln, mapSeqToStruct,str,i);
322+
Atom a = MultipleAlignmentTools.getAtomForSequencePosition(multAln, mapSeqToStruct,str,i);
313323
if (a != null) {
314324
cmd.append(JmolTools.getPdbInfo(a));
315325
cmd.append("/"+(str+1)+", ");

biojava-structure-gui/src/main/java/org/biojava/nbio/structure/align/gui/aligpanel/MultipleStatusDisplay.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ public void mouseOverPosition(AlignedPosition p) {
7575
String alnseq = panel.getAlnSequences().get(str);
7676
char c = alnseq.charAt(p.getPos1());
7777

78-
Atom a = MultipleAlignmentTools.getAtomForAligPos(panel.getMultipleAlignment(), panel.getMapSeqToStruct(), str, p.getPos1());
78+
Atom a = MultipleAlignmentTools.getAtomForSequencePosition(panel.getMultipleAlignment(), panel.getMapSeqToStruct(), str, p.getPos1());
7979
String pdbInfo = JmolTools.getPdbInfo(a);
8080
msg += ": "+pdbInfo + " ("+c+") ";
8181
}
@@ -104,7 +104,7 @@ public void toggleSelection(AlignedPosition p) {
104104
String alnseq = panel.getAlnSequences().get(str);
105105
char c = alnseq.charAt(p.getPos1());
106106

107-
Atom a = MultipleAlignmentTools.getAtomForAligPos(panel.getMultipleAlignment(), panel.getMapSeqToStruct(), str, p.getPos1());
107+
Atom a = MultipleAlignmentTools.getAtomForSequencePosition(panel.getMultipleAlignment(), panel.getMapSeqToStruct(), str, p.getPos1());
108108
String pdbInfo = JmolTools.getPdbInfo(a);
109109

110110
msg += ": "+pdbInfo + " ("+c+") ";
@@ -127,8 +127,8 @@ public void rangeSelected(AlignedPosition start, AlignedPosition end) {
127127
char c1 = alnseq.charAt(start.getPos1());
128128
char c2 = alnseq.charAt(end.getPos1());
129129

130-
Atom a1 = MultipleAlignmentTools.getAtomForAligPos(panel.getMultipleAlignment(), panel.getMapSeqToStruct(), str, start.getPos1());
131-
Atom a2 = MultipleAlignmentTools.getAtomForAligPos(panel.getMultipleAlignment(), panel.getMapSeqToStruct(), str, end.getPos1());
130+
Atom a1 = MultipleAlignmentTools.getAtomForSequencePosition(panel.getMultipleAlignment(), panel.getMapSeqToStruct(), str, start.getPos1());
131+
Atom a2 = MultipleAlignmentTools.getAtomForSequencePosition(panel.getMultipleAlignment(), panel.getMapSeqToStruct(), str, end.getPos1());
132132

133133
String pdbInfo1 = JmolTools.getPdbInfo(a1);
134134
String pdbInfo2 = JmolTools.getPdbInfo(a2);

biojava-structure/src/main/java/org/biojava/nbio/structure/align/cemc/CeMcMain.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ public MultipleAlignment align(List<Atom[]> atomArrays, Object params) throws St
252252
List<Future<MultipleAlignment>> afpFuture = new ArrayList<Future<MultipleAlignment>>();
253253
int seed = 0;
254254

255-
//Repeat the optimization 10 times in parallel, to obtain a more robust result.
255+
//Repeat the optimization in parallel, to obtain a more robust result.
256256
for (int i=0; i<1; i++){
257257
Callable<MultipleAlignment> worker = new CeMcOptimizer(result, seed+i,reference);
258258
Future<MultipleAlignment> submit = executor.submit(worker);

biojava-structure/src/main/java/org/biojava/nbio/structure/align/cemc/CeMcOptimizer.java

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,11 @@ public class CeMcOptimizer implements Callable<MultipleAlignment> {
3939
private MultipleSuperimposer imposer;;
4040

4141
//Optimization parameters
42-
private static final int Rmin = 2; //Minimum number of aligned structures without a gap
42+
private static int Rmin = 2; //Minimum number of aligned structures without a gap (33% of initial)
4343
private static final int Lmin = 15; //Minimum alignment length of a Block
4444
private static final int iterFactor = 100; //Factor to control the max number of iterations of optimization
4545
private static final double C = 20; //Probability function constant (probability of acceptance for bad moves)
4646

47-
//Score function parameters
48-
private static final double M = 20.0; //Maximum score of a column
49-
private static final double G = 10.0; //Gap penalty for partial gaps in the alignment
50-
5147
//Alignment Information
5248
private MultipleAlignment msa; //alignment - seed and return type
5349
private int size; //number of structures in the alignment
@@ -101,6 +97,7 @@ private void initialize(MultipleAlignment seed) throws StructureException {
10197
//Initialize member variables
10298
msa = seed.clone();
10399
size = seed.size();
100+
Rmin = Math.max(size/3,2);
104101
atomArrays = msa.getEnsemble().getAtomArrays();
105102
structureLengths = new ArrayList<Integer>();
106103
for (int i=0; i<size; i++) structureLengths.add(atomArrays.get(i).length);
@@ -159,7 +156,7 @@ private void optimizeMC(int maxIter) throws StructureException {
159156
scoreHistory = new ArrayList<Double>();
160157

161158
int conv = 0; //Number of steps without an alignment improvement
162-
int stepsToConverge = maxIter/50;
159+
int stepsToConverge =maxIter/20;
163160
int i = 1;
164161

165162
while (i<maxIter && conv<stepsToConverge){
@@ -175,12 +172,10 @@ private void optimizeMC(int maxIter) throws StructureException {
175172
double lastScore = mcScore;
176173

177174
boolean moved = false;
178-
int options = 3;
179-
if (conv > stepsToConverge/2) options = 4; //Only allow gap insertion when the convergence is approaching
180175

181176
while (!moved){
182177
//Randomly select one of the steps to modify the alignment
183-
int move = rnd.nextInt(options);
178+
int move = rnd.nextInt(4);
184179
switch (move){
185180
case 0: moved = shiftRow();
186181
if (debug) System.out.println("did shift");
@@ -469,7 +464,7 @@ else if (currentBlock.getAlignRes().get(str).get(rightRes) == null){
469464

470465
/**
471466
* It extends the alignment one position to the right or to the left of a randomly selected position
472-
* by moving the consecutive residues of each subunit (if present) from the freePool to the block.
467+
* by moving the consecutive residues of each subunit (if enough) from the freePool to the block.
473468
* <p>
474469
* If there are not enough residues in the freePool it introduces gaps.
475470
*/
@@ -486,6 +481,21 @@ private boolean expandBlock(){
486481
case 0:
487482

488483
int rightBoundary = res;
484+
int[] previousPos = new int[size];
485+
for (int str=0; str<size; str++) previousPos[str] = -1;
486+
487+
//Search a position to the right that has at minimum Rmin non consecutive residues (otherwise not enough freePool residues to expand)
488+
while (currentBlock.length()-1>rightBoundary){
489+
int noncontinuous = 0;
490+
for (int str=0; str<size; str++){
491+
if (currentBlock.getAlignRes().get(str).get(rightBoundary) == null) continue;
492+
else if (previousPos[str] == -1) previousPos[str] = currentBlock.getAlignRes().get(str).get(rightBoundary);
493+
else if (currentBlock.getAlignRes().get(str).get(rightBoundary) > previousPos[str]+1) noncontinuous++;
494+
}
495+
if (noncontinuous < Rmin) rightBoundary++;
496+
else break;
497+
}
498+
rightBoundary--;
489499

490500
//Expand the block with the residues at the subunit boundaries
491501
for (int str=0; str<size; str++){
@@ -510,6 +520,20 @@ private boolean expandBlock(){
510520
case 1:
511521

512522
int leftBoundary = res;
523+
int[] nextPos = new int[size];
524+
for (int str=0; str<size; str++) nextPos[str] = -1;
525+
526+
//Search a position to the right that has at minimum Rmin non consecutive residues (otherwise not enough freePool residues to expand)
527+
while (leftBoundary>0){
528+
int noncontinuous = 0;
529+
for (int str=0; str<size; str++){
530+
if (currentBlock.getAlignRes().get(str).get(leftBoundary) == null) continue;
531+
else if (nextPos[str] == -1) nextPos[str] = currentBlock.getAlignRes().get(str).get(leftBoundary);
532+
else if (currentBlock.getAlignRes().get(str).get(leftBoundary) < nextPos[str]-1) noncontinuous++;
533+
}
534+
if (noncontinuous < Rmin) leftBoundary--;
535+
else break;
536+
}
513537

514538
//Expand the block with the residues at the subunit boundaries
515539
for (int str=0; str<size; str++){

0 commit comments

Comments
 (0)