Skip to content

Commit ea5b48c

Browse files
committed
an example for how to align DNA
git-svn-id: http://code.open-bio.org/repos/biojava/biojava-live/trunk@9746 7c6358e6-4a41-0410-a743-a5b2a554c398
1 parent 4ca31b9 commit ea5b48c

File tree

1 file changed

+77
-0
lines changed

1 file changed

+77
-0
lines changed
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
package demo;
2+
3+
import org.biojava3.alignment.Alignments;
4+
import org.biojava3.alignment.Alignments.PairwiseSequenceAlignerType;
5+
import org.biojava3.alignment.SimpleGapPenalty;
6+
import org.biojava3.alignment.SubstitutionMatrixHelper;
7+
import org.biojava3.alignment.template.GapPenalty;
8+
import org.biojava3.alignment.template.PairwiseSequenceAligner;
9+
import org.biojava3.alignment.template.SequencePair;
10+
import org.biojava3.core.sequence.DNASequence;
11+
import org.biojava3.core.sequence.compound.AmbiguityDNACompoundSet;
12+
import org.biojava3.core.sequence.compound.NucleotideCompound;
13+
14+
public class TestDNANeedlemanWunsch {
15+
public static void main(String[] args){
16+
17+
String query = "AGGATGAACGCTGGCGGCGTGCTTAACACATGCAAGTCGAACGGTGAAGCCCAGCTTGCTGGGTGGATCA" +
18+
"GTGGCGAACGGGTGAGTAACACGTGAGCAACCTGCCCCTGACTCTGGGATAAGCGCTGGAAACGGTGTCT" +
19+
"AATACTGGATATGAGCTACCACCGCATGGTGAGTGGTTGGAAAGATTTTTCGGTTGGGGATGGGCTCGCG" +
20+
"GCCTATCAGCTTGTTGGTGAGGTAATGGCTCACCAAGGCGTCGACGGGTAGCCGGCCTGAGAGGGTGACC" +
21+
"GGCCACACTGGGACTGAGACACGGCCCAGACTCTACGGGAGGCAGCAGTGGGGAATATTGCACAATGGGC" +
22+
"GGAAGCCTGATGCAGCAACGCCGCGTGAGGGACGACGGCTTCGGGTTGTAAACCTCTTTTAGCAGGGAAG" +
23+
"AAGCGAGAGTGACGGTACCTGCAGAAAAAGCGCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAG" +
24+
"GGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCTGTGAAAA" +
25+
"CCCGAGGCTCAACCTNNGGGCTGCAGTGGGTACGGGCAGACTAGAGTGCGGTAGGGGAGATTGGAATTCC" +
26+
"TGGTGTAGCGGTGGAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGATCTCTGGGCCGTAAC" +
27+
"TGACGCTGAGGAGCGAAAGGGTGGGGAGCAAACAGGCTTAGATACCCTGGTAGTCCACCCCGTAAACGTT" +
28+
"GGGAACTAGTTGTGGGGTCCTTTCCACGGATTCCGTGACGCACGTAACGCATTAAGTTCCCCGCCTGGGG" +
29+
"AGTACGGCCGCAAGGCTAAAACTCAAAGGAATTGACGGGGACCCGCACAAGCGGCGGAGCATGCGGATTA" +
30+
"AATCGATGCAACGCGAAGAACCTTACCAAGGCTTGACATACACGAGAACGCTGCAGAAATGTAGAACTCT" +
31+
"TTGGACACTCGTGAACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCG" +
32+
"CAACGAGCGCAACCCTCGTTCTATGTTGCCAGCACGTAATGGTGGGAACTCATGGGATACTGCCGGGGTC" +
33+
"AACTCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGTCTTGGGCTTCACGCATGCTACA" +
34+
"ATGGCCGGTACAAAGGGCTGCAATACCGTGAGGTGGAGCGAATCCCAAAAAGCCGGTCCCAGTTCGGATT" +
35+
"GAGGTCTGCAACTCGACCTCATGAAGTCGGAGTCGCTAGTAATCGCAGATCAGCAACGCTGCGGTGAATA" +
36+
"CGTTCCCGGGTCTTGTACACACCGCCCGTCAAGTCATGAAAGTCGGTAACACCTGAAGCCGGTGGCCTAA" +
37+
"CCCTTGTGGAGGGAGCCGGTAATTAAA";
38+
39+
String target = "CTGGCCGCCTGCTTAACACATCCAAGTCGAACGGTGAAGCCCCANCTTACTGGGTGGATCAGTGCCGAAC" +
40+
"GGGTGAGTAACACGTGAGCAACCTCCCCCTGACTCTGGGATAAGCGCTGGAANCGGTGTCTAATACTGGA" +
41+
"TATGAGCTACCACCGCATGGTGAGTGGTTGGAAAGATTTTTCGGTTGGGGATGGGCTCGCGCCCTATGAG" +
42+
"CTTGTTGGTGAGGTAATGGCTCACCAAGCCGTCGACGGGTAGCCGGCCTGAGAGGGTGACCGNCCACACT" +
43+
"GGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCACAATGGGCGGAAGCCT" +
44+
"GATTCANCAACCCCGCGTGAGGGACGACGGCCTTCGGGTTGTAAACCTCTTTTAGCAGGGAAGAAGCGAG" +
45+
"AGTGACGGTACCTGCAGAAAAAGCCCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGCGCAA" +
46+
"GCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCTGTGAAAACCCGAGG" +
47+
"CTCAACCTCGGGCCTGCAGTGGGTACGGGCAGACTAGAGTGCGGTAGGGGAGATTGGAATTCCTGGTGTA" +
48+
"GCGGTGGAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGATCTCTGGGCCGTAACTGACGCT" +
49+
"GAGGAGCGAAAGGGTGGGGAGCAAACAGGCTTAGATACCCTGGTAGTCCACCCCGTAAACGTTGGGAACT" +
50+
"AGTTGTGGGGTCCTTTCCACGGATTCCGTGACGCAGCTAACGCATTAAGTTCCCCGCCTGGGGAGTACGG" +
51+
"CCGCAAGGCTAAAACTCAAAGGAATTGACGGGGACCCGCACAAGCGGCGGAGCATGCGGATTAATTCGAT" +
52+
"GCAACGCGAAGAACCTTACCAAGGCTTGACATACACGAGAACGCTGCAGAAATGTAGAACTCTTTGGACA" +
53+
"CTCGTGAACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAG" +
54+
"CGCAACCCTCGTTCTATGTTGCCAGCACGTAATGGTGGGAACTCATGGGATACTGCCGGGGTCAACTCGG" +
55+
"AGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGTCTTGGGCTTCACGCATGCTACAATGGCCG" +
56+
"GTACAAAGGGCTGCAATACCGTGAGGTGGAGCGAATCCCAAAAAGCCGGTCCCAGTTCGGATTGAGGTCT" +
57+
"GCAACTCGACCTCATGAAGTCGGAGTCGCTAGTAATCGCAGATCAGCAACGCTGCGGTGAATACGTTCCC" +
58+
"GGGTCTTGTACACACCGCCCGTCAAGTCATGAAAGTCGGTAACACCTGAAGCCGGTGGCCCAACCCTTGT" +
59+
"GGAGGGAGCCGTCGAAGGTGGGATCGGTAATTAGGACTAAGTCGTAACAAGGTAGCCGTACC";
60+
61+
GapPenalty penalty = new SimpleGapPenalty((short)-14, (short)-4);
62+
PairwiseSequenceAligner<DNASequence, NucleotideCompound> aligner = Alignments.getPairwiseAligner(
63+
new DNASequence(query, AmbiguityDNACompoundSet.getDNACompoundSet()),
64+
new DNASequence(target, AmbiguityDNACompoundSet.getDNACompoundSet()),
65+
PairwiseSequenceAlignerType.GLOBAL,
66+
penalty, SubstitutionMatrixHelper.getNuc4_4());
67+
SequencePair<DNASequence, NucleotideCompound>
68+
alignment = aligner.getPair();
69+
70+
System.out.println(alignment);
71+
72+
int identical = alignment.getNumIdenticals();
73+
System.out.println("Number of identical residues: " + identical);
74+
System.out.println("% identical query: " + identical / (float) query.length() );
75+
System.out.println("% identical target: " + identical / (float) target.length() );
76+
}
77+
}

0 commit comments

Comments
 (0)