Skip to content

Commit 0473ab6

Browse files
committed
Jronn disorder scores calculation: handling sequences containing the selenocysteine (Sec) amino acid.
Unit test: the user manually converts the stop codons symbols ("*") to the "U" symbol and Jronn is expected to handle this sequence.
1 parent 31333af commit 0473ab6

File tree

2 files changed

+50
-4
lines changed

2 files changed

+50
-4
lines changed

biojava-protein-disorder/src/main/java/org/biojava/nbio/data/sequence/SequenceUtil.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,20 +63,20 @@ public final class SequenceUtil {
6363
/**
6464
* Valid Amino acids
6565
*/
66-
public static final Pattern AA = Pattern.compile("[ARNDCQEGHILKMFPSTWYV]+",
66+
public static final Pattern AA = Pattern.compile("[ARNDCQEGHILKMFPSTWYVUO]+",
6767
Pattern.CASE_INSENSITIVE);
6868

6969
/**
7070
* inversion of AA pattern
7171
*/
7272
public static final Pattern NON_AA = Pattern.compile(
73-
"[^ARNDCQEGHILKMFPSTWYVX]+", Pattern.CASE_INSENSITIVE);
73+
"[^ARNDCQEGHILKMFPSTWYVXUO]+", Pattern.CASE_INSENSITIVE);
7474

7575
/**
76-
* Same as AA pattern but with two additional letters - XU
76+
* Same as AA pattern but with one additional letters - X
7777
*/
7878
public static final Pattern AMBIGUOUS_AA = Pattern.compile(
79-
"[ARNDCQEGHILKMFPSTWYVXU]+", Pattern.CASE_INSENSITIVE);
79+
"[ARNDCQEGHILKMFPSTWYVXUO]+", Pattern.CASE_INSENSITIVE);
8080

8181
/**
8282
* Nucleotides a, t, g, c, u

biojava-protein-disorder/src/test/java/org/biojava/nbio/ronn/JronnTest.java

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,16 @@
2020
*/
2121
package org.biojava.nbio.ronn;
2222

23+
import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
24+
import org.biojava.nbio.core.sequence.DNASequence;
25+
import org.biojava.nbio.core.sequence.ProteinSequence;
26+
import org.biojava.nbio.core.sequence.RNASequence;
2327
import org.biojava.nbio.data.sequence.FastaSequence;
2428
import org.biojava.nbio.ronn.Jronn.Range;
2529
import org.junit.Test;
2630

2731
import static org.junit.Assert.assertEquals;
32+
import static org.junit.Assert.fail;
2833

2934

3035
public class JronnTest {
@@ -99,4 +104,45 @@ public void close() {
99104
}
100105
}
101106
}
107+
108+
/** Test the user scenario when disorder scores are calculated over a sequence containing the selenocysteine (Sec) amino acid.
109+
* The user has to manually convert the stop codons symbols ("*") to the "U" symbol and Jronn is expected to handle this sequence.
110+
*
111+
* @throws CompoundNotFoundException
112+
*/
113+
114+
@Test
115+
public void testStopCodons() throws CompoundNotFoundException {
116+
117+
// gene: DIO2, NM_001007023
118+
String dnaString = "ATGGGCATCCTCAGCGTAGACTTGCTGATCACACTGCAAATTCTGCCAGTTTTTTTCTCCAACTGCCTCT" +
119+
"TCCTGGCTCTCTATGACTCGGTCATTCTGCTCAAGCACGTGGTGCTGCTGTTGAGCCGCTCCAAGTCCAC" +
120+
"TCGCGGAGAGTGGCGGCGCATGCTGACCTCAGAGGGACTGCGCTGCGTCTGGAAGAGCTTCCTCCTCGAT" +
121+
"GCCTACAAACAGCTAAATTGTCCTCCATCAGGTTTTAGCAAAGATGGACACATTTTATGACTAGTATATG" +
122+
"AAGCTTATAAAAGCAGACTACTGGTCTACTCACATTTGGATTTATGGATGGTGAAATTGGGTGAGGATGC" +
123+
"CCCCAATTCCAGTGTGGTGCATGTCTCCAGTACAGAAGGAGGTGACAACAGTGGCAATGGTACCCAGGAG" +
124+
"AAGATAGCTGAGGGAGCCACATGCCACCTTCTTGACTTTGCCAGCCCTGAGCGCCCACTAGTGGTCAACT" +
125+
"TTGGCTCAGCCACTTGACCTCCTTTCACGAGCCAGCTGCCAGCCTTCCGCAAACTGGTGGAAGAGTTCTC" +
126+
"CTCAGTGGCTGACTTCCTGCTGGTCTACATTGATGAGGCTCATCCATCAGATGGCTGGGCGATACCGGGG" +
127+
"GACTCCTCTTTGTCTTTTGAGGTGAAGAAGCACCAGAACCAGGAAGATCGATGTGCAGCAGCCCAGCAGC" +
128+
"TTCTGGAGCGTTTCTCCTTGCCGCCCCAGTGCCGAGTTGTGGCTGACCGCATGGACAATAACGCCAACAT" +
129+
"AGCTTACGGGGTAGCCTTTGAACGTGTGTGCATTGTGCAGAGACAGAAAATTGCTTATCTGGGAGGAAAG" +
130+
"GGCCCCTTCTCCTACAACCTTCAAGAAGTCCGGCATTGGCTGGAGAAGAATTTCAGCAAGAGATGAAAGA" +
131+
"AAACTAGATTAGCTGGTTAA";
132+
133+
DNASequence dnaSequence = new DNASequence(dnaString);
134+
RNASequence mRNA = dnaSequence.getRNASequence();
135+
ProteinSequence protein = mRNA.getProteinSequence();
136+
137+
String proteinString = protein.getSequenceAsString();
138+
// replace the symbol * (codon TGA) with U at amino acid
139+
String proteinStringU = proteinString.replaceAll("\\*", "u");
140+
FastaSequence fsequence = new FastaSequence("", proteinStringU);
141+
try {
142+
float[] rawProbabilityScores = Jronn.getDisorderScores(fsequence);
143+
}
144+
catch (Exception e) {
145+
fail("Disorder scores calculation doesn't work");
146+
}
147+
}
102148
}

0 commit comments

Comments
 (0)