Skip to content

Commit 993f65a

Browse files
committed
adding a unit test for checking genomic ranges to CDS mapping tools
1 parent eda3ecd commit 993f65a

File tree

2 files changed

+208
-16
lines changed

2 files changed

+208
-16
lines changed

biojava-genome/src/main/java/org/biojava/nbio/genome/util/ChromosomeMappingTools.java

Lines changed: 57 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ public class ChromosomeMappingTools {
2020
public static final boolean debug = false;
2121
private static final String newline = System.getProperty("line.separator");
2222

23+
public static final String CHROMOSOME = "CHROMOSOME";
24+
public static final String CDS = "CDS";
25+
2326
public static String formatExonStructure(GeneChromosomePosition chromosomePosition ){
2427
if ( chromosomePosition.getOrientation() == '+')
2528
return formatExonStructureForward(chromosomePosition);
@@ -33,10 +36,8 @@ public static String printHTMLExonStructure(GeneChromosomePosition chromosomePos
3336
return printHTMLExonStructureForward(chromosomePosition);
3437

3538
return printHTMLExonStructureReverse(chromosomePosition);
36-
3739
}
3840

39-
4041
private static String formatExonStructureForward(GeneChromosomePosition chromPos) {
4142
StringWriter s = new StringWriter();
4243

@@ -764,8 +765,8 @@ public static int getCDSLengthReverse(List<Integer> exonStarts,
764765
inCoding = false;
765766
codingLength += (end - cdsStart);
766767
if (debug) {
767-
System.out.println(" <- Exon : " + cdsStart + " - " + end + " | " + (end - cdsStart) + " | " + (codingLength-3) + " | " + (codingLength % 3));
768-
System.out.println(" UTR : " + start + " - " + (cdsStart - 1));
768+
System.out.println(" <- Exon : " + (cdsStart+1) + " - " + end + " | " + (end - cdsStart) + " | " + (codingLength-3) + " | " + (codingLength % 3));
769+
System.out.println(" UTR : " + start + " - " + (cdsStart ));
769770
}
770771

771772
} else if (inCoding) {
@@ -862,12 +863,26 @@ public static int getCDSLengthForward(List<Integer> exonStarts, List<Integer> ex
862863
*/
863864
public static List<Range> getCDSExonRanges(GeneChromosomePosition chromPos){
864865
if ( chromPos.getOrientation() == '+')
865-
return getCDSExonRangesForward(chromPos);
866+
return getCDSExonRangesForward(chromPos,CDS);
867+
868+
return getCDSExonRangesReverse(chromPos,CDS);
869+
}
870+
871+
872+
/** Extracts the boundaries of the coding regions in chromosomal coordinates
873+
*
874+
* @param chromPos
875+
* @return
876+
*/
877+
public static List<Range> getChromosomalRangesForCDS(GeneChromosomePosition chromPos){
878+
if ( chromPos.getOrientation() == '+')
879+
return getCDSExonRangesForward(chromPos,CHROMOSOME);
866880

867-
return getCDSExonRangesReverse(chromPos);
881+
return getCDSExonRangesReverse(chromPos,CHROMOSOME);
868882
}
869883

870-
private static List<Range> getCDSExonRangesReverse(GeneChromosomePosition chromPos) {
884+
private static List<Range> getCDSExonRangesReverse(GeneChromosomePosition chromPos,
885+
String responseType) {
871886
List<Integer> exonStarts = chromPos.getExonStarts();
872887
List<Integer> exonEnds = chromPos.getExonEnds();
873888

@@ -928,13 +943,23 @@ private static List<Range> getCDSExonRangesReverse(GeneChromosomePosition chromP
928943
s.append(" UTR :" + format(cdsStart) + " - " + format(start + 1));
929944
s.append(newline);
930945
}
931-
Range r = Range.closed(0,codingLength);
946+
947+
Range r ;
948+
if ( responseType.equals(CDS))
949+
r = Range.closed(0,codingLength);
950+
else
951+
r = Range.closed(tmpstart,cdsEnd);
952+
932953
data.add(r);
933954

934955
} else if (start <= cdsStart && end >= cdsStart) {
935956
inCoding = false;
957+
Range r;
958+
if ( responseType.equals(CDS))
959+
r = Range.closed(codingLength,codingLength+(end-cdsStart));
960+
else
961+
r = Range.closed(cdsStart+1,end);
936962

937-
Range r = Range.closed(codingLength,codingLength+(end-cdsStart));
938963
data.add(r);
939964

940965
codingLength += (end - cdsStart);
@@ -948,8 +973,11 @@ private static List<Range> getCDSExonRangesReverse(GeneChromosomePosition chromP
948973

949974
} else if (inCoding) {
950975
// full exon is coding
951-
952-
Range r = Range.closed(codingLength,codingLength+(end-start));
976+
Range r;
977+
if ( responseType.equals(CDS))
978+
r = Range.closed(codingLength,codingLength+(end-start));
979+
else
980+
r = Range.closed(start,end);
953981
data.add(r);
954982

955983
codingLength += (end - start);
@@ -975,7 +1003,8 @@ private static List<Range> getCDSExonRangesReverse(GeneChromosomePosition chromP
9751003
}
9761004

9771005

978-
private static List<Range> getCDSExonRangesForward(GeneChromosomePosition chromPos) {
1006+
private static List<Range> getCDSExonRangesForward(GeneChromosomePosition chromPos,
1007+
String responseType) {
9791008

9801009
List<Range> data = new ArrayList<Range>();
9811010
List<Integer> exonStarts = chromPos.getExonStarts();
@@ -998,21 +1027,33 @@ private static List<Range> getCDSExonRangesForward(GeneChromosomePosition chromP
9981027
inCoding = true;
9991028
codingLength += (end - cdsStart);
10001029
//
1001-
Range r = Range.closed(0,codingLength);
1030+
1031+
Range r;
1032+
if ( responseType.equals(CDS))
1033+
r = Range.closed(0,codingLength);
1034+
else
1035+
r = Range.closed(cdsStart,end);
10021036
data.add(r);
10031037

10041038
} else if (start <= cdsEnd && end >= cdsEnd) {
10051039
//System.out.println(" <-- CDS end at: " + cdsEnd );
10061040
inCoding = false;
10071041

1008-
Range r = Range.closed(codingLength,codingLength+(cdsEnd-start));
1042+
Range r;
1043+
if ( responseType.equals(CDS))
1044+
r = Range.closed(codingLength,codingLength+(cdsEnd-start));
1045+
else
1046+
r = Range.closed(start,cdsEnd);
10091047
data.add(r);
10101048
codingLength += (cdsEnd - start);
10111049

10121050
} else if (inCoding) {
10131051
// full exon is coding
1014-
1015-
Range r = Range.closed(codingLength,codingLength+(end-start));
1052+
Range r;
1053+
if ( responseType.equals(CDS))
1054+
r = Range.closed(codingLength,codingLength+(end-start));
1055+
else
1056+
r = Range.closed(start,end);
10161057
data.add(r);
10171058
codingLength += (end - start);
10181059

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
package org.biojava.nbio.genome;
2+
3+
import com.google.common.collect.Lists;
4+
import com.google.common.collect.Range;
5+
import junit.framework.TestCase;
6+
import org.biojava.nbio.genome.parsers.genename.GeneChromosomePosition;
7+
import org.biojava.nbio.genome.parsers.genename.GeneChromosomePositionParser;
8+
import org.biojava.nbio.genome.util.ChromosomeMappingTools;
9+
import org.junit.Test;
10+
11+
import java.io.InputStream;
12+
import java.net.URL;
13+
import java.util.List;
14+
import java.util.zip.GZIPInputStream;
15+
16+
/**
17+
* Created by andreas on 7/19/16.
18+
*/
19+
public class TestGenomeMapping extends TestCase{
20+
21+
static final String geneChromosomeFile = "http://cdn.rcsb.org/gene/hg38/geneChromosome38.tsf.gz";
22+
23+
List<GeneChromosomePosition> gcps = null;
24+
25+
@Override
26+
protected void setUp() throws Exception {
27+
super.setUp();
28+
InputStream input = new GZIPInputStream(new URL(geneChromosomeFile).openStream());
29+
gcps = GeneChromosomePositionParser.getChromosomeMappings(input);
30+
31+
32+
}
33+
34+
35+
@Test
36+
public void testAK1() {
37+
String geneName = "AK1";
38+
39+
assertNotNull(gcps);
40+
assertTrue("Problems with downloading refFlat file from UCSC browser ", gcps.size() > 100);
41+
42+
int uniProtLength = 194;
43+
44+
try {
45+
46+
for (GeneChromosomePosition pos : gcps) {
47+
48+
//System.out.println(pos.getGeneName());
49+
if (!pos.getGeneName().equals(geneName))
50+
continue;
51+
52+
/// there are three alternative transcripts for AK1.
53+
// we are just testing one here:
54+
55+
if ( ! pos.getGenebankId().equals("NM_000476"))
56+
continue;
57+
58+
assertTrue(pos.getGeneName().equals(geneName));
59+
assertTrue(pos.getOrientation().equals('-'));
60+
assertTrue(pos.getChromosome().equals("chr9"));
61+
62+
List<Range> cdsranges = ChromosomeMappingTools.getCDSExonRanges(pos);
63+
64+
validateExon(0,0,7, cdsranges );
65+
validateExon(1,7,43, cdsranges );
66+
validateExon(2,43,207, cdsranges );
67+
validateExon(3,207,324, cdsranges );
68+
validateExon(4,324,516, cdsranges );
69+
validateExon(5,516,585, cdsranges );
70+
71+
72+
int cdslength = ChromosomeMappingTools.getCDSLength(pos);
73+
74+
assertTrue("CDS length should be 582, but is " + cdslength, cdslength == (uniProtLength *3));
75+
76+
List<Range> chromranges = ChromosomeMappingTools.getChromosomalRangesForCDS(pos);
77+
78+
// we are reverse strand. reverse the order
79+
chromranges = Lists.reverse(chromranges);
80+
81+
assertTrue(chromranges.size() == 6);
82+
83+
// compare with https://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi?REQUEST=CCDS&DATA=CCDS6881
84+
validateExon(0,127868008,127868076, chromranges );
85+
validateExon(1,127868320,127868512, chromranges );
86+
validateExon(2,127871822,127871939, chromranges );
87+
validateExon(3,127872689,127872853, chromranges );
88+
validateExon(4,127873025,127873061, chromranges );
89+
validateExon(5,127874610,127874617, chromranges );
90+
91+
}
92+
} catch (Exception e) {
93+
fail(e.getMessage());
94+
}
95+
}
96+
97+
@Test
98+
public void testHBA(){
99+
100+
String geneName = "HBA1";
101+
assertNotNull(gcps);
102+
103+
assertTrue("Problems with downloading refFlat file from UCSC browser ", gcps.size() > 100);
104+
105+
try {
106+
107+
for ( GeneChromosomePosition pos : gcps){
108+
109+
//System.out.println(pos.getGeneName());
110+
if ( ! pos.getGeneName().equals(geneName))
111+
continue;
112+
113+
assertTrue(pos.getGeneName().equals("HBA1"));
114+
assertTrue(pos.getGenebankId().equals("NM_000558"));
115+
assertTrue(pos.getChromosome().equals("chr16"));
116+
assertTrue(pos.getTranscriptionStart().equals(176650));
117+
assertTrue(pos.getTranscriptionEnd().equals(177522));
118+
assertTrue(pos.getOrientation().equals('+'));
119+
120+
List<Range> cdsranges = ChromosomeMappingTools.getCDSExonRanges(pos);
121+
122+
assertTrue(cdsranges.size() == 3);
123+
124+
validateExon(0,0,95,cdsranges);
125+
validateExon(1,95,300,cdsranges);
126+
validateExon(2,300,429,cdsranges);
127+
128+
129+
List<Range> chromranges = ChromosomeMappingTools.getChromosomalRangesForCDS(pos);
130+
131+
validateExon(0,176716,176811, chromranges );
132+
validateExon(1,176928,177133, chromranges );
133+
validateExon(2,177282,177411, chromranges );
134+
135+
136+
}
137+
} catch (Exception e){
138+
fail(e.getMessage());
139+
}
140+
141+
142+
}
143+
144+
private void validateExon(int exonNr, int start, int stop, List<Range> cdsranges) {
145+
146+
Range exon = cdsranges.get(exonNr);
147+
assertTrue("Exon " + exonNr + " boundary "+ exon.lowerEndpoint() + " does not match " +start , exon.lowerEndpoint().equals(start));
148+
assertTrue("Exon " + exonNr + " boundary " + exon.upperEndpoint() + " does not match " + stop, exon.upperEndpoint().equals(stop));
149+
150+
}
151+
}

0 commit comments

Comments
 (0)