Skip to content

Commit 8930d61

Browse files
committed
adding Fastq.convertTo(FastqVariant) and supporting methods
1 parent 76ae666 commit 8930d61

File tree

5 files changed

+373
-2
lines changed

5 files changed

+373
-2
lines changed

biojava-sequencing/src/main/java/org/biojava/nbio/sequencing/io/fastq/Fastq.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,20 @@ public FastqVariant getVariant()
121121
return variant;
122122
}
123123

124+
/**
125+
* Create and return a new FASTQ formatted sequence from this converted to the
126+
* specified FASTQ sequence format variant.
127+
*
128+
* @since 4.2
129+
* @param variant FASTQ sequence format variant, must not be null
130+
* @return a new FASTQ formatted sequence from this converted to the
131+
* specified FASTQ sequence format variant
132+
*/
133+
public Fastq convertTo(final FastqVariant variant)
134+
{
135+
return FastqTools.convert(this, variant);
136+
}
137+
124138
/**
125139
* Create and return a new FastqBuilder.
126140
* The FastqBuilder will not be null.

biojava-sequencing/src/main/java/org/biojava/nbio/sequencing/io/fastq/FastqTools.java

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,67 @@ public static double[] errorProbabilities(final Fastq fastq, final double[] erro
268268
return errorProbabilities;
269269
}
270270

271+
/**
272+
* Convert the specified FASTQ formatted sequence to the
273+
* specified FASTQ sequence format variant.
274+
*
275+
* @since 4.2
276+
* @param fastq FASTQ formatted sequence, must not be null
277+
* @param variant FASTQ sequence format variant, must not be null
278+
* @return the specified FASTQ formatted sequence converted to the
279+
* specified FASTQ sequence format variant
280+
*/
281+
public static Fastq convert(final Fastq fastq, final FastqVariant variant)
282+
{
283+
if (fastq == null)
284+
{
285+
throw new IllegalArgumentException("fastq must not be null");
286+
}
287+
if (variant == null)
288+
{
289+
throw new IllegalArgumentException("variant must not be null");
290+
}
291+
if (fastq.getVariant().equals(variant))
292+
{
293+
return fastq;
294+
}
295+
return new Fastq(fastq.getDescription(), fastq.getSequence(), convertQualities(fastq, variant), variant);
296+
}
297+
298+
/**
299+
* Convert the qualities in the specified FASTQ formatted sequence to the
300+
* specified FASTQ sequence format variant.
301+
*
302+
* @since 4.2
303+
* @param fastq FASTQ formatted sequence, must not be null
304+
* @param variant FASTQ sequence format variant, must not be null
305+
* @return the qualities in the specified FASTQ formatted sequence converted to the
306+
* specified FASTQ sequence format variant
307+
*/
308+
static String convertQualities(final Fastq fastq, final FastqVariant variant)
309+
{
310+
if (fastq == null)
311+
{
312+
throw new IllegalArgumentException("fastq must not be null");
313+
}
314+
if (variant == null)
315+
{
316+
throw new IllegalArgumentException("variant must not be null");
317+
}
318+
if (fastq.getVariant().equals(variant))
319+
{
320+
return fastq.getQuality();
321+
}
322+
int size = fastq.getQuality().length();
323+
double[] errorProbabilities = errorProbabilities(fastq, new double[size]);
324+
StringBuilder sb = new StringBuilder(size);
325+
for (int i = 0; i < size; i++)
326+
{
327+
sb.append(variant.quality(variant.qualityScore(errorProbabilities[i])));
328+
}
329+
return sb.toString();
330+
}
331+
271332
/**
272333
* Return the specified iterable as a list.
273334
*

biojava-sequencing/src/main/java/org/biojava/nbio/sequencing/io/fastq/FastqVariant.java

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,14 @@ public int qualityScore(final char c)
5151
return ((int) c) - 33;
5252
}
5353

54+
@Override
55+
public int qualityScore(final double errorProbability)
56+
{
57+
// eq. 2
58+
int phredQ = constrain(-10.0d * Math.log10(errorProbability));
59+
return phredQ;
60+
}
61+
5462
@Override
5563
public char quality(final int qualityScore)
5664
{
@@ -93,6 +101,17 @@ public int qualityScore(final char c)
93101
return ((int) c) - 64;
94102
}
95103

104+
@Override
105+
public int qualityScore(final double errorProbability)
106+
{
107+
// eq. 2
108+
double phredQ = -10.0d * Math.log10(errorProbability);
109+
// eq. 4
110+
int solexaQ = constrain(10.0d * Math.log10(Math.pow(10.0d, (phredQ/10.0d)) - 1.0d));
111+
112+
return solexaQ;
113+
}
114+
96115
@Override
97116
public char quality(final int qualityScore)
98117
{
@@ -111,7 +130,7 @@ public char quality(final int qualityScore)
111130
public double errorProbability(final int qualityScore)
112131
{
113132
double q = Math.pow(10.0d, ((double) qualityScore) / -10.0d);
114-
return q / (1 + q);
133+
return q / (1.0d + q);
115134
}
116135
},
117136

@@ -136,6 +155,14 @@ public int qualityScore(final char c)
136155
return ((int) c) - 64;
137156
}
138157

158+
@Override
159+
public int qualityScore(final double errorProbability)
160+
{
161+
// eq. 2
162+
int phredQ = constrain(-10.0d * Math.log10(errorProbability));
163+
return phredQ;
164+
}
165+
139166
@Override
140167
public char quality(final int qualityScore)
141168
{
@@ -252,6 +279,15 @@ public boolean isIllumina()
252279
*/
253280
public abstract int qualityScore(char c);
254281

282+
/**
283+
* Convert the specified error probability to a quality score.
284+
*
285+
* @since 4.2
286+
* @param errorProbability error probability
287+
* @return the specified error probability converted to a quality score
288+
*/
289+
public abstract int qualityScore(double errorProbability);
290+
255291
/**
256292
* Convert the specified quality score to a quality in ASCII format.
257293
*
@@ -291,6 +327,20 @@ public String lowercaseName()
291327
return name().toLowerCase().replace('_', '-');
292328
}
293329

330+
/**
331+
* Constrain the specified quality score in double precision to the minimum and maximum quality
332+
* scores in int precision.
333+
*
334+
* @since 4.2
335+
* @param qualityScore quality score in double precision
336+
* @return the specified quality score in double precision constrained to the minimum and maximum quality
337+
* scores in int precision
338+
*/
339+
protected int constrain(final double qualityScore)
340+
{
341+
// ick.
342+
return Math.min(maximumQualityScore(), Math.max(minimumQualityScore(), Math.round((float) qualityScore)));
343+
}
294344

295345
/**
296346
* Return the FASTQ sequence format variant with the specified name, if any. The name may
@@ -305,4 +355,4 @@ public static FastqVariant parseFastqVariant(final String name)
305355
{
306356
return FASTQ_VARIANTS.get(name);
307357
}
308-
}
358+
}
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
/*
2+
* BioJava development code
3+
*
4+
* This code may be freely distributed and modified under the
5+
* terms of the GNU Lesser General Public Licence. This should
6+
* be distributed with the code. If you do not have a copy,
7+
* see:
8+
*
9+
* http://www.gnu.org/copyleft/lesser.html
10+
*
11+
* Copyright for this code is held jointly by the individual
12+
* authors. These should be listed in @author doc comments.
13+
*
14+
* For more information on the BioJava project and its aims,
15+
* or to join the biojava-l mailing list, visit the home page
16+
* at:
17+
*
18+
* http://www.biojava.org/
19+
*
20+
*/
21+
package org.biojava.nbio.sequencing.io.fastq;
22+
23+
import java.io.File;
24+
import java.io.FileWriter;
25+
import java.io.IOException;
26+
27+
import java.util.List;
28+
import java.util.Map;
29+
30+
import junit.framework.TestCase;
31+
32+
import com.google.common.collect.Lists;
33+
import com.google.common.collect.Maps;
34+
35+
/**
36+
* Round trip conversion functional tests.
37+
*/
38+
public final class ConvertTest
39+
extends TestCase
40+
{
41+
42+
public void testConvert() throws Exception
43+
{
44+
Map<FastqVariant, FastqReader> readers = Maps.newHashMap();
45+
readers.put(FastqVariant.FASTQ_SANGER, new SangerFastqReader());
46+
readers.put(FastqVariant.FASTQ_SOLEXA, new SolexaFastqReader());
47+
readers.put(FastqVariant.FASTQ_ILLUMINA, new IlluminaFastqReader());
48+
49+
Map<FastqVariant, FastqWriter> writers = Maps.newHashMap();
50+
writers.put(FastqVariant.FASTQ_SANGER, new SangerFastqWriter());
51+
writers.put(FastqVariant.FASTQ_SOLEXA, new SolexaFastqWriter());
52+
writers.put(FastqVariant.FASTQ_ILLUMINA, new IlluminaFastqWriter());
53+
54+
Map<FastqVariant, String> inputFileNames = Maps.newHashMap();
55+
inputFileNames.put(FastqVariant.FASTQ_SANGER, "sanger_full_range_as_sanger.fastq");
56+
inputFileNames.put(FastqVariant.FASTQ_SOLEXA, "solexa_full_range_as_solexa.fastq");
57+
inputFileNames.put(FastqVariant.FASTQ_ILLUMINA, "illumina_full_range_as_illumina.fastq");
58+
59+
Map<FastqVariantPair, String> expectedFileNames = Maps.newHashMap();
60+
expectedFileNames.put(new FastqVariantPair(FastqVariant.FASTQ_SANGER, FastqVariant.FASTQ_SANGER), "sanger_full_range_as_sanger.fastq");
61+
expectedFileNames.put(new FastqVariantPair(FastqVariant.FASTQ_SANGER, FastqVariant.FASTQ_SOLEXA), "sanger_full_range_as_solexa.fastq");
62+
expectedFileNames.put(new FastqVariantPair(FastqVariant.FASTQ_SANGER, FastqVariant.FASTQ_ILLUMINA), "sanger_full_range_as_illumina.fastq");
63+
expectedFileNames.put(new FastqVariantPair(FastqVariant.FASTQ_SOLEXA, FastqVariant.FASTQ_SANGER), "solexa_full_range_as_sanger.fastq");
64+
expectedFileNames.put(new FastqVariantPair(FastqVariant.FASTQ_SOLEXA, FastqVariant.FASTQ_SOLEXA), "solexa_full_range_as_solexa.fastq");
65+
expectedFileNames.put(new FastqVariantPair(FastqVariant.FASTQ_SOLEXA, FastqVariant.FASTQ_ILLUMINA), "solexa_full_range_as_illumina.fastq");
66+
expectedFileNames.put(new FastqVariantPair(FastqVariant.FASTQ_ILLUMINA, FastqVariant.FASTQ_SANGER), "illumina_full_range_as_sanger.fastq");
67+
expectedFileNames.put(new FastqVariantPair(FastqVariant.FASTQ_ILLUMINA, FastqVariant.FASTQ_SOLEXA), "illumina_full_range_as_solexa.fastq");
68+
expectedFileNames.put(new FastqVariantPair(FastqVariant.FASTQ_ILLUMINA, FastqVariant.FASTQ_ILLUMINA), "illumina_full_range_as_illumina.fastq");
69+
70+
for (FastqVariant variant1 : FastqVariant.values())
71+
{
72+
FastqReader reader = readers.get(variant1);
73+
String inputFileName = inputFileNames.get(variant1);
74+
for (FastqVariant variant2 : FastqVariant.values())
75+
{
76+
FastqWriter writer = writers.get(variant2);
77+
String expectedFileName = expectedFileNames.get(new FastqVariantPair(variant1, variant2));
78+
79+
File tmp = File.createTempFile("convertTest", "fastq");
80+
FileWriter fileWriter = new FileWriter(tmp);
81+
82+
for (Fastq fastq : reader.read(getClass().getResource(inputFileName))) {
83+
writer.append(fileWriter, fastq.convertTo(variant2));
84+
}
85+
86+
try
87+
{
88+
fileWriter.close();
89+
}
90+
catch (Exception e)
91+
{
92+
// ignore
93+
}
94+
95+
FastqReader resultReader = readers.get(variant2);
96+
List<Fastq> observed = Lists.newArrayList(resultReader.read(tmp));
97+
List<Fastq> expected = Lists.newArrayList(resultReader.read(getClass().getResource(expectedFileName)));
98+
99+
assertEquals(expected.size(), observed.size());
100+
for (int i = 0; i < expected.size(); i++)
101+
{
102+
assertEquals(expected.get(i).getDescription(), observed.get(i).getDescription());
103+
assertEquals(expected.get(i).getSequence(), observed.get(i).getSequence());
104+
assertEquals(expected.get(i).getQuality(), observed.get(i).getQuality());
105+
assertEquals(expected.get(i).getVariant(), observed.get(i).getVariant());
106+
}
107+
}
108+
}
109+
}
110+
111+
static final class FastqVariantPair
112+
{
113+
final FastqVariant variant1;
114+
final FastqVariant variant2;
115+
116+
FastqVariantPair(final FastqVariant variant1, final FastqVariant variant2)
117+
{
118+
this.variant1 = variant1;
119+
this.variant2 = variant2;
120+
}
121+
122+
@Override
123+
public int hashCode()
124+
{
125+
int result = 47;
126+
result = 31 * result + variant1.hashCode();
127+
result = 31 * result + variant2.hashCode();
128+
return result;
129+
}
130+
131+
@Override
132+
public boolean equals(final Object o)
133+
{
134+
if (o == this)
135+
{
136+
return true;
137+
}
138+
if (!(o instanceof FastqVariantPair))
139+
{
140+
return false;
141+
}
142+
FastqVariantPair pair = (FastqVariantPair) o;
143+
return variant1.equals(pair.variant1) && variant2.equals(pair.variant2);
144+
}
145+
}
146+
}

0 commit comments

Comments
 (0)