|
39 | 39 | */ |
40 | 40 | public class CaseFreeAminoAcidCompoundSet implements CompoundSet<AminoAcidCompound> { |
41 | 41 |
|
42 | | - private final Map<String, AminoAcidCompound> aminoAcidCompoundCache = new HashMap<String, AminoAcidCompound>(); |
43 | | - private final Map<AminoAcidCompound, Set<AminoAcidCompound>> equivalentsCache = |
44 | | - new HashMap<AminoAcidCompound, Set<AminoAcidCompound>>(); |
45 | | - |
46 | | - public CaseFreeAminoAcidCompoundSet() { |
47 | | - aminoAcidCompoundCache.put("A", new AminoAcidCompound(null, "A", "Ala", "Alanine", 71.0788f)); |
48 | | - aminoAcidCompoundCache.put("R", new AminoAcidCompound(null, "R", "Arg", "Arginine", 156.1875f)); |
49 | | - aminoAcidCompoundCache.put("N", new AminoAcidCompound(null, "N", "Asn", "Asparagine", 114.1039f)); |
50 | | - aminoAcidCompoundCache.put("D", new AminoAcidCompound(null, "D", "Asp", "Aspartic acid", 115.0886f)); |
51 | | - aminoAcidCompoundCache.put("C", new AminoAcidCompound(null, "C", "Cys", "Cysteine", 103.1388f)); |
52 | | - aminoAcidCompoundCache.put("E", new AminoAcidCompound(null, "E", "Glu", "Glutamic acid", 129.1155f)); |
53 | | - aminoAcidCompoundCache.put("Q", new AminoAcidCompound(null, "Q", "Gln", "Glutamine", 128.1307f)); |
54 | | - aminoAcidCompoundCache.put("G", new AminoAcidCompound(null, "G", "Gly", "Glycine", 57.0519f)); |
55 | | - aminoAcidCompoundCache.put("H", new AminoAcidCompound(null, "H", "His", "Histidine", 137.1411f)); |
56 | | - aminoAcidCompoundCache.put("I", new AminoAcidCompound(null, "I", "Ile", "Isoleucine", 113.1594f)); |
57 | | - aminoAcidCompoundCache.put("L", new AminoAcidCompound(null, "L", "Leu", "Leucine", 113.1594f)); |
58 | | - aminoAcidCompoundCache.put("K", new AminoAcidCompound(null, "K", "Lys", "Lysine", 128.1741f)); |
59 | | - aminoAcidCompoundCache.put("M", new AminoAcidCompound(null, "M", "Met", "Methionine", 131.1986f)); |
60 | | - aminoAcidCompoundCache.put("F", new AminoAcidCompound(null, "F", "Phe", "Phenylalanine", 147.1766f)); |
61 | | - aminoAcidCompoundCache.put("P", new AminoAcidCompound(null, "P", "Pro", "Proline", 97.1167f)); |
62 | | - aminoAcidCompoundCache.put("S", new AminoAcidCompound(null, "S", "Ser", "Serine", 87.0782f)); |
63 | | - aminoAcidCompoundCache.put("T", new AminoAcidCompound(null, "T", "Thr", "Threonine", 101.1051f)); |
64 | | - aminoAcidCompoundCache.put("W", new AminoAcidCompound(null, "W", "Trp", "Tryptophan", 186.2132f)); |
65 | | - aminoAcidCompoundCache.put("Y", new AminoAcidCompound(null, "Y", "Tyr", "Tyrosine", 163.1760f)); |
66 | | - aminoAcidCompoundCache.put("V", new AminoAcidCompound(null, "V", "Val", "Valine", 99.1326f)); |
67 | | - aminoAcidCompoundCache.put("B", new AminoAcidCompound(null, "B", "Asx", "Asparagine or Aspartic acid", null)); |
68 | | - aminoAcidCompoundCache.put("Z", new AminoAcidCompound(null, "Z", "Glx", "Glutamine or Glutamic acid", null)); |
69 | | - aminoAcidCompoundCache.put("J", new AminoAcidCompound(null, "J", "Xle", "Leucine or Isoleucine", null)); |
70 | | - aminoAcidCompoundCache.put("X", new AminoAcidCompound(null, "X", "Xaa", "Unspecified", null)); |
71 | | - aminoAcidCompoundCache.put("-", new AminoAcidCompound(null, "-", "---", "Unspecified", null)); |
72 | | - aminoAcidCompoundCache.put(".", new AminoAcidCompound(null, ".", "...", "Unspecified", null)); |
73 | | - aminoAcidCompoundCache.put("_", new AminoAcidCompound(null, "_", "___", "Unspecified", null)); |
74 | | - aminoAcidCompoundCache.put("*", new AminoAcidCompound(null, "*", "***", "Stop", null)); |
75 | | - |
76 | | - //Selenocysteine - this is encoded by UGA with the presence |
77 | | - //of a SECIS element (SElenoCysteine Insertion Sequence) in the mRNA |
78 | | - //and is a post-translation modification |
79 | | - aminoAcidCompoundCache.put("U", new AminoAcidCompound(null, "U", "Sec", "Selenocysteine", 150.0388f)); |
80 | | - |
81 | | - //Pyrrolysine is encoded by UAG in mRNA (normally Amber stop codon) which is translated to |
82 | | - //this amino acid under the presence of pylT which creates an anti-codon CUA & pylS |
83 | | - //which then does the actual conversion to Pyl. |
84 | | - aminoAcidCompoundCache.put("O", new AminoAcidCompound(null, "O", "Pyl", "Pyrrolysine", 255.3172f)); |
85 | | - |
86 | | - Map<String, AminoAcidCompound> lowerCaseSet = new HashMap<String, AminoAcidCompound>(); |
87 | | - for(String s:this.aminoAcidCompoundCache.keySet()){ |
88 | | - lowerCaseSet.put(s.toLowerCase(), this.aminoAcidCompoundCache.get(s)); |
89 | | - } |
90 | | - this.aminoAcidCompoundCache.putAll(lowerCaseSet); |
91 | | - } |
92 | | - |
93 | | - @Override |
| 42 | + private final Map<String, AminoAcidCompound> aminoAcidCompoundCache = new HashMap<String, AminoAcidCompound>(); |
| 43 | + private final Map<AminoAcidCompound, Set<AminoAcidCompound>> equivalentsCache = |
| 44 | + new HashMap<AminoAcidCompound, Set<AminoAcidCompound>>(); |
| 45 | + |
| 46 | + public CaseFreeAminoAcidCompoundSet() { |
| 47 | + aminoAcidCompoundCache.put("A", new AminoAcidCompound(null, "A", "Ala", "Alanine", 71.0788f)); |
| 48 | + aminoAcidCompoundCache.put("R", new AminoAcidCompound(null, "R", "Arg", "Arginine", 156.1875f)); |
| 49 | + aminoAcidCompoundCache.put("N", new AminoAcidCompound(null, "N", "Asn", "Asparagine", 114.1039f)); |
| 50 | + aminoAcidCompoundCache.put("D", new AminoAcidCompound(null, "D", "Asp", "Aspartic acid", 115.0886f)); |
| 51 | + aminoAcidCompoundCache.put("C", new AminoAcidCompound(null, "C", "Cys", "Cysteine", 103.1388f)); |
| 52 | + aminoAcidCompoundCache.put("E", new AminoAcidCompound(null, "E", "Glu", "Glutamic acid", 129.1155f)); |
| 53 | + aminoAcidCompoundCache.put("Q", new AminoAcidCompound(null, "Q", "Gln", "Glutamine", 128.1307f)); |
| 54 | + aminoAcidCompoundCache.put("G", new AminoAcidCompound(null, "G", "Gly", "Glycine", 57.0519f)); |
| 55 | + aminoAcidCompoundCache.put("H", new AminoAcidCompound(null, "H", "His", "Histidine", 137.1411f)); |
| 56 | + aminoAcidCompoundCache.put("I", new AminoAcidCompound(null, "I", "Ile", "Isoleucine", 113.1594f)); |
| 57 | + aminoAcidCompoundCache.put("L", new AminoAcidCompound(null, "L", "Leu", "Leucine", 113.1594f)); |
| 58 | + aminoAcidCompoundCache.put("K", new AminoAcidCompound(null, "K", "Lys", "Lysine", 128.1741f)); |
| 59 | + aminoAcidCompoundCache.put("M", new AminoAcidCompound(null, "M", "Met", "Methionine", 131.1986f)); |
| 60 | + aminoAcidCompoundCache.put("F", new AminoAcidCompound(null, "F", "Phe", "Phenylalanine", 147.1766f)); |
| 61 | + aminoAcidCompoundCache.put("P", new AminoAcidCompound(null, "P", "Pro", "Proline", 97.1167f)); |
| 62 | + aminoAcidCompoundCache.put("S", new AminoAcidCompound(null, "S", "Ser", "Serine", 87.0782f)); |
| 63 | + aminoAcidCompoundCache.put("T", new AminoAcidCompound(null, "T", "Thr", "Threonine", 101.1051f)); |
| 64 | + aminoAcidCompoundCache.put("W", new AminoAcidCompound(null, "W", "Trp", "Tryptophan", 186.2132f)); |
| 65 | + aminoAcidCompoundCache.put("Y", new AminoAcidCompound(null, "Y", "Tyr", "Tyrosine", 163.1760f)); |
| 66 | + aminoAcidCompoundCache.put("V", new AminoAcidCompound(null, "V", "Val", "Valine", 99.1326f)); |
| 67 | + aminoAcidCompoundCache.put("B", new AminoAcidCompound(null, "B", "Asx", "Asparagine or Aspartic acid", null)); |
| 68 | + aminoAcidCompoundCache.put("Z", new AminoAcidCompound(null, "Z", "Glx", "Glutamine or Glutamic acid", null)); |
| 69 | + aminoAcidCompoundCache.put("J", new AminoAcidCompound(null, "J", "Xle", "Leucine or Isoleucine", null)); |
| 70 | + aminoAcidCompoundCache.put("X", new AminoAcidCompound(null, "X", "Xaa", "Unspecified", null)); |
| 71 | + aminoAcidCompoundCache.put("-", new AminoAcidCompound(null, "-", "---", "Unspecified", null)); |
| 72 | + aminoAcidCompoundCache.put(".", new AminoAcidCompound(null, ".", "...", "Unspecified", null)); |
| 73 | + aminoAcidCompoundCache.put("_", new AminoAcidCompound(null, "_", "___", "Unspecified", null)); |
| 74 | + aminoAcidCompoundCache.put("*", new AminoAcidCompound(null, "*", "***", "Stop", null)); |
| 75 | + |
| 76 | + //Selenocysteine - this is encoded by UGA with the presence |
| 77 | + //of a SECIS element (SElenoCysteine Insertion Sequence) in the mRNA |
| 78 | + //and is a post-translation modification |
| 79 | + aminoAcidCompoundCache.put("U", new AminoAcidCompound(null, "U", "Sec", "Selenocysteine", 150.0388f)); |
| 80 | + |
| 81 | + //Pyrrolysine is encoded by UAG in mRNA (normally Amber stop codon) which is translated to |
| 82 | + //this amino acid under the presence of pylT which creates an anti-codon CUA & pylS |
| 83 | + //which then does the actual conversion to Pyl. |
| 84 | + aminoAcidCompoundCache.put("O", new AminoAcidCompound(null, "O", "Pyl", "Pyrrolysine", 255.3172f)); |
| 85 | + |
| 86 | + Map<String, AminoAcidCompound> lowerCaseSet = new HashMap<String, AminoAcidCompound>(); |
| 87 | + for(String s:this.aminoAcidCompoundCache.keySet()){ |
| 88 | + lowerCaseSet.put(s.toLowerCase(), this.aminoAcidCompoundCache.get(s)); |
| 89 | + } |
| 90 | + this.aminoAcidCompoundCache.putAll(lowerCaseSet); |
| 91 | + } |
| 92 | + |
| 93 | + @Override |
94 | 94 | public String getStringForCompound(AminoAcidCompound compound) { |
95 | | - return compound.toString(); |
96 | | - } |
| 95 | + return compound.toString(); |
| 96 | + } |
97 | 97 |
|
98 | | - @Override |
| 98 | + @Override |
99 | 99 | public AminoAcidCompound getCompoundForString(String string) { |
100 | | - if (string.length() == 0) { |
101 | | - return null; |
102 | | - } |
103 | | - if (string.length() > this.getMaxSingleCompoundStringLength()) { |
104 | | - throw new IllegalArgumentException("String supplied ("+string+") is too long. Max is "+getMaxSingleCompoundStringLength()); |
105 | | - } |
106 | | - return this.aminoAcidCompoundCache.get(string); |
107 | | - } |
108 | | - |
109 | | - @Override |
| 100 | + if (string.length() == 0) { |
| 101 | + return null; |
| 102 | + } |
| 103 | + if (string.length() > this.getMaxSingleCompoundStringLength()) { |
| 104 | + throw new IllegalArgumentException("String supplied ("+string+") is too long. Max is "+getMaxSingleCompoundStringLength()); |
| 105 | + } |
| 106 | + return this.aminoAcidCompoundCache.get(string); |
| 107 | + } |
| 108 | + |
| 109 | + @Override |
110 | 110 | public int getMaxSingleCompoundStringLength() { |
111 | | - return 1; |
112 | | - } |
| 111 | + return 1; |
| 112 | + } |
113 | 113 |
|
114 | 114 |
|
115 | | - @Override |
| 115 | + @Override |
116 | 116 | public boolean isCompoundStringLengthEqual() { |
117 | | - return true; |
118 | | - } |
| 117 | + return true; |
| 118 | + } |
119 | 119 |
|
120 | | - private final static CaseFreeAminoAcidCompoundSet aminoAcidCompoundSet = new CaseFreeAminoAcidCompoundSet(); |
| 120 | + private final static CaseFreeAminoAcidCompoundSet aminoAcidCompoundSet = new CaseFreeAminoAcidCompoundSet(); |
121 | 121 |
|
122 | | - public static CaseFreeAminoAcidCompoundSet getAminoAcidCompoundSet() { |
123 | | - return aminoAcidCompoundSet; |
124 | | - } |
| 122 | + public static CaseFreeAminoAcidCompoundSet getAminoAcidCompoundSet() { |
| 123 | + return aminoAcidCompoundSet; |
| 124 | + } |
125 | 125 |
|
126 | | - @Override |
| 126 | + @Override |
127 | 127 | public boolean compoundsEquivalent(AminoAcidCompound compoundOne, AminoAcidCompound compoundTwo) { |
128 | | - Set<AminoAcidCompound> equivalents = getEquivalentCompounds(compoundOne); |
129 | | - return (equivalents != null) && equivalents.contains(compoundTwo); |
130 | | - } |
| 128 | + Set<AminoAcidCompound> equivalents = getEquivalentCompounds(compoundOne); |
| 129 | + return (equivalents != null) && equivalents.contains(compoundTwo); |
| 130 | + } |
131 | 131 |
|
132 | | - @Override |
| 132 | + @Override |
133 | 133 | public Set<AminoAcidCompound> getEquivalentCompounds(AminoAcidCompound compound) { |
134 | | - if (equivalentsCache.isEmpty()) { |
135 | | - // most compounds are equivalent to themselves alone |
136 | | - for (AminoAcidCompound c : aminoAcidCompoundCache.values()) { |
137 | | - equivalentsCache.put(c, Collections.singleton(c)); |
138 | | - } |
139 | | - // ambiguous Asparagine or Aspartic acid |
140 | | - addAmbiguousEquivalents("N", "D", "B"); |
141 | | - // ambiguous Glutamine or Glutamic acid |
142 | | - addAmbiguousEquivalents("E", "Q", "Z"); |
143 | | - // ambiguous Leucine or Isoleucine |
144 | | - addAmbiguousEquivalents("I", "L", "J"); |
145 | | - // ambiguous gaps |
146 | | - AminoAcidCompound gap1, gap2, gap3; |
147 | | - Set<AminoAcidCompound> gaps = new HashSet<AminoAcidCompound>(); |
148 | | - gaps.add(gap1 = aminoAcidCompoundCache.get("-")); |
149 | | - gaps.add(gap2 = aminoAcidCompoundCache.get(".")); |
150 | | - gaps.add(gap3 = aminoAcidCompoundCache.get("_")); |
151 | | - equivalentsCache.put(gap1, gaps); |
152 | | - equivalentsCache.put(gap2, gaps); |
153 | | - equivalentsCache.put(gap3, gaps); |
154 | | - // X is never equivalent, even to itself |
155 | | - equivalentsCache.put(aminoAcidCompoundCache.get("X"), new HashSet<AminoAcidCompound>()); |
156 | | - } |
157 | | - return equivalentsCache.get(compound); |
158 | | - } |
159 | | - |
160 | | - // helper method to initialize the equivalent sets for 2 amino acid compounds and their ambiguity compound |
161 | | - private void addAmbiguousEquivalents(String one, String two, String either) { |
162 | | - Set<AminoAcidCompound> equivalents; |
163 | | - AminoAcidCompound cOne, cTwo, cEither; |
164 | | - |
165 | | - equivalents = new HashSet<AminoAcidCompound>(); |
166 | | - equivalents.add(cOne = aminoAcidCompoundCache.get(one)); |
167 | | - equivalents.add(cTwo = aminoAcidCompoundCache.get(two)); |
168 | | - equivalents.add(cEither = aminoAcidCompoundCache.get(either)); |
169 | | - equivalentsCache.put(cEither, equivalents); |
170 | | - |
171 | | - equivalents = new HashSet<AminoAcidCompound>(); |
172 | | - equivalents.add(cOne); |
173 | | - equivalents.add(cEither); |
174 | | - equivalentsCache.put(cOne, equivalents); |
175 | | - |
176 | | - equivalents = new HashSet<AminoAcidCompound>(); |
177 | | - equivalents.add(cTwo); |
178 | | - equivalents.add(cEither); |
179 | | - equivalentsCache.put(cTwo, equivalents); |
180 | | - } |
181 | | - |
182 | | - @Override |
| 134 | + if (equivalentsCache.isEmpty()) { |
| 135 | + // most compounds are equivalent to themselves alone |
| 136 | + for (AminoAcidCompound c : aminoAcidCompoundCache.values()) { |
| 137 | + equivalentsCache.put(c, Collections.singleton(c)); |
| 138 | + } |
| 139 | + // ambiguous Asparagine or Aspartic acid |
| 140 | + addAmbiguousEquivalents("N", "D", "B"); |
| 141 | + // ambiguous Glutamine or Glutamic acid |
| 142 | + addAmbiguousEquivalents("E", "Q", "Z"); |
| 143 | + // ambiguous Leucine or Isoleucine |
| 144 | + addAmbiguousEquivalents("I", "L", "J"); |
| 145 | + // ambiguous gaps |
| 146 | + AminoAcidCompound gap1, gap2, gap3; |
| 147 | + Set<AminoAcidCompound> gaps = new HashSet<AminoAcidCompound>(); |
| 148 | + gaps.add(gap1 = aminoAcidCompoundCache.get("-")); |
| 149 | + gaps.add(gap2 = aminoAcidCompoundCache.get(".")); |
| 150 | + gaps.add(gap3 = aminoAcidCompoundCache.get("_")); |
| 151 | + equivalentsCache.put(gap1, gaps); |
| 152 | + equivalentsCache.put(gap2, gaps); |
| 153 | + equivalentsCache.put(gap3, gaps); |
| 154 | + // X is never equivalent, even to itself |
| 155 | + equivalentsCache.put(aminoAcidCompoundCache.get("X"), new HashSet<AminoAcidCompound>()); |
| 156 | + } |
| 157 | + return equivalentsCache.get(compound); |
| 158 | + } |
| 159 | + |
| 160 | + // helper method to initialize the equivalent sets for 2 amino acid compounds and their ambiguity compound |
| 161 | + private void addAmbiguousEquivalents(String one, String two, String either) { |
| 162 | + Set<AminoAcidCompound> equivalents; |
| 163 | + AminoAcidCompound cOne, cTwo, cEither; |
| 164 | + |
| 165 | + equivalents = new HashSet<AminoAcidCompound>(); |
| 166 | + equivalents.add(cOne = aminoAcidCompoundCache.get(one)); |
| 167 | + equivalents.add(cTwo = aminoAcidCompoundCache.get(two)); |
| 168 | + equivalents.add(cEither = aminoAcidCompoundCache.get(either)); |
| 169 | + equivalentsCache.put(cEither, equivalents); |
| 170 | + |
| 171 | + equivalents = new HashSet<AminoAcidCompound>(); |
| 172 | + equivalents.add(cOne); |
| 173 | + equivalents.add(cEither); |
| 174 | + equivalentsCache.put(cOne, equivalents); |
| 175 | + |
| 176 | + equivalents = new HashSet<AminoAcidCompound>(); |
| 177 | + equivalents.add(cTwo); |
| 178 | + equivalents.add(cEither); |
| 179 | + equivalentsCache.put(cTwo, equivalents); |
| 180 | + } |
| 181 | + |
| 182 | + @Override |
183 | 183 | public boolean hasCompound(AminoAcidCompound compound) { |
184 | | - return aminoAcidCompoundCache.containsValue(compound); |
185 | | - } |
| 184 | + return aminoAcidCompoundCache.containsValue(compound); |
| 185 | + } |
186 | 186 |
|
187 | | - @Override |
| 187 | + @Override |
188 | 188 | public List<AminoAcidCompound> getAllCompounds() { |
189 | | - return new ArrayList<AminoAcidCompound>(aminoAcidCompoundCache.values()); |
190 | | - } |
| 189 | + return new ArrayList<AminoAcidCompound>(aminoAcidCompoundCache.values()); |
| 190 | + } |
191 | 191 |
|
192 | 192 |
|
193 | | - @Override |
| 193 | + @Override |
194 | 194 | public boolean isComplementable() { |
195 | | - return false; |
196 | | - } |
197 | | - |
198 | | - @Override |
199 | | - public boolean isValidSequence(Sequence<AminoAcidCompound> sequence) { |
200 | | - for (AminoAcidCompound c: sequence) { |
201 | | - if (!hasCompound(c)) { |
202 | | - return false; |
203 | | - } |
204 | | - } |
205 | | - return true; |
206 | | - } |
| 195 | + return false; |
| 196 | + } |
| 197 | + |
| 198 | + @Override |
| 199 | + public boolean isValidSequence(Sequence<AminoAcidCompound> sequence) { |
| 200 | + for (AminoAcidCompound c: sequence) { |
| 201 | + if (!hasCompound(c)) { |
| 202 | + return false; |
| 203 | + } |
| 204 | + } |
| 205 | + return true; |
| 206 | + } |
207 | 207 | } |
0 commit comments