Skip to content

Commit 9cf551a

Browse files
committed
Merge branch 'patch' into minor
2 parents 9bd7bc3 + d4d3d5b commit 9cf551a

File tree

16 files changed

+700
-393
lines changed

16 files changed

+700
-393
lines changed

biojava-core/src/main/java/org/biojava/nbio/core/sequence/ProteinSequence.java

Lines changed: 43 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@
4141
import java.io.InputStream;
4242
import java.net.URL;
4343
import java.util.LinkedHashMap;
44+
import java.util.List;
45+
import org.biojava.nbio.core.sequence.features.Qualifier;
4446

4547
/**
4648
* The representation of a ProteinSequence
@@ -50,22 +52,21 @@
5052
*/
5153
public class ProteinSequence extends AbstractSequence<AminoAcidCompound> {
5254

53-
private final static Logger logger = LoggerFactory.getLogger(ProteinSequence.class);
54-
55-
/*
56-
private ArrayList<FeatureInterface<AbstractSequence<AminoAcidCompound>, AminoAcidCompound>> features
57-
= new ArrayList<FeatureInterface<AbstractSequence<AminoAcidCompound>, AminoAcidCompound>>();
58-
private LinkedHashMap<String, ArrayList<FeatureInterface<AbstractSequence<AminoAcidCompound>, AminoAcidCompound>>> groupedFeatures
59-
= new LinkedHashMap<String, ArrayList<FeatureInterface<AbstractSequence<AminoAcidCompound>, AminoAcidCompound>>>();
60-
*/
55+
private final static Logger logger = LoggerFactory.getLogger(ProteinSequence.class);
6156

57+
/*
58+
private ArrayList<FeatureInterface<AbstractSequence<AminoAcidCompound>, AminoAcidCompound>> features
59+
= new ArrayList<FeatureInterface<AbstractSequence<AminoAcidCompound>, AminoAcidCompound>>();
60+
private LinkedHashMap<String, ArrayList<FeatureInterface<AbstractSequence<AminoAcidCompound>, AminoAcidCompound>>> groupedFeatures
61+
= new LinkedHashMap<String, ArrayList<FeatureInterface<AbstractSequence<AminoAcidCompound>, AminoAcidCompound>>>();
62+
*/
6263
/**
6364
* Create a protein from a string
6465
*
6566
* @param seqString
66-
* @throws CompoundNotFoundException
67+
* @throws CompoundNotFoundException
6768
*/
68-
public ProteinSequence(String seqString) throws CompoundNotFoundException {
69+
public ProteinSequence(String seqString) throws CompoundNotFoundException {
6970
this(seqString, AminoAcidCompoundSet.getAminoAcidCompoundSet());
7071
}
7172

@@ -74,7 +75,7 @@ public ProteinSequence(String seqString) throws CompoundNotFoundException {
7475
*
7576
* @param seqString
7677
* @param compoundSet
77-
* @throws CompoundNotFoundException
78+
* @throws CompoundNotFoundException
7879
*/
7980
public ProteinSequence(String seqString, CompoundSet<AminoAcidCompound> compoundSet) throws CompoundNotFoundException {
8081
super(seqString, compoundSet);
@@ -97,9 +98,35 @@ public ProteinSequence(ProxySequenceReader<AminoAcidCompound> proxyLoader) {
9798
* file or via a Uniprot Proxy reader via Uniprot ID
9899
*
99100
* @param proxyLoader
101+
* @param compoundSet
100102
*/
101103
public ProteinSequence(ProxySequenceReader<AminoAcidCompound> proxyLoader, CompoundSet<AminoAcidCompound> compoundSet) {
102104
super(proxyLoader, compoundSet);
105+
106+
// do protein-specific tasks
107+
// add source if found
108+
List<FeatureInterface<AbstractSequence<AminoAcidCompound>, AminoAcidCompound>> CDSFeatures = getFeaturesByType("CDS");
109+
110+
// cases if a protein has more than 1 parent are not supported yet
111+
if (CDSFeatures.size() == 1) {
112+
Qualifier codedBy = CDSFeatures.get(0).getQualifiers().get("coded_by");
113+
114+
if (codedBy != null) {
115+
String codedBySeq = codedBy.getValue();
116+
117+
InsdcParser parser = new InsdcParser(DataSource.GENBANK);
118+
Location location = parser.parse(codedBySeq);
119+
120+
try {
121+
DNASequence dnaSeq = new DNASequence(getSequence(location), DNACompoundSet.getDNACompoundSet());
122+
setParentDNASequence(dnaSeq, location.getStart().getPosition(), location.getEnd().getPosition());
123+
} catch (CompoundNotFoundException e) {
124+
// TODO is there another solution to handle this exception?
125+
logger.error("Could not add 'coded_by' parent DNA location feature, unrecognised compounds found in DNA sequence: {}", e.getMessage());
126+
}
127+
}
128+
}
129+
103130
}
104131

105132
/**
@@ -125,39 +152,12 @@ public void setParentDNASequence(AbstractSequence<NucleotideCompound> parentDNAS
125152
setBioEnd(end);
126153
}
127154

128-
/**
129-
* Add feature.
130-
* <p>
131-
* If feature is type 'coded_by' than resolves parent DNA sequence.
132-
* </p>
133-
* @param feature
134-
*/
135-
@Override
136-
public void addFeature(FeatureInterface<AbstractSequence<AminoAcidCompound>, AminoAcidCompound> feature) {
137-
super.addFeature(feature);
138-
139-
// if feature is called 'coded_by' than add parent DNA location
140-
if (feature.getType().equals("coded_by")) {
141-
InsdcParser parser = new InsdcParser(DataSource.GENBANK);
142-
143-
Location location = parser.parse(feature.getSource());
144-
// convert location into DNASequence
145-
try {
146-
DNASequence dnaSeq = new DNASequence(getSequence(location), DNACompoundSet.getDNACompoundSet());
147-
setParentDNASequence(dnaSeq, location.getStart().getPosition(), location.getEnd().getPosition());
148-
} catch (CompoundNotFoundException e) {
149-
// TODO is there another solution to handle this exception?
150-
logger.error("Could not add 'coded_by' parent DNA location feature, unrecognised compounds found in DNA sequence: {}",e.getMessage());
151-
}
152-
}
153-
}
154-
155155
private DNASequence getRawParentSequence(String accessId) throws IOException {
156156
String seqUrlTemplate = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=%s&rettype=fasta&retmode=text";
157157
URL url = new URL(String.format(seqUrlTemplate, accessId));
158-
159-
logger.info("Getting parent DNA sequence from URL: {}", url.toString());
160-
158+
159+
logger.trace("Getting parent DNA sequence from URL: {}", url.toString());
160+
161161
InputStream is = url.openConnection().getInputStream();
162162

163163
FastaReader<DNASequence, NucleotideCompound> parentReader
@@ -176,14 +176,14 @@ private DNASequence getRawParentSequence(String accessId) throws IOException {
176176
}
177177

178178
private String getSequence(Location cdna) {
179-
DNASequence rawParent = null;
179+
DNASequence rawParent;
180180
if (!cdna.isComplex()) {
181181
try {
182182
rawParent = getRawParentSequence(cdna.getAccession().getID());
183183
return cdna.getSubSequence(rawParent).getSequenceAsString();
184184
} catch (IOException e) {
185185
// return null
186-
logger.error("Caught IOException when getting DNA sequence for id {}. Error: {}", cdna.getAccession().getID(), e.getMessage());
186+
logger.error("Caught IOException when getting DNA sequence for id {}. Error: {}", cdna.getAccession().getID(), e.getMessage());
187187
return null;
188188
}
189189
} else {

biojava-core/src/main/java/org/biojava/nbio/core/sequence/features/Qualifier.java

Lines changed: 80 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -18,78 +18,83 @@
1818
* http://www.biojava.org/
1919
*
2020
*/
21-
/**
22-
*
23-
*/
24-
package org.biojava.nbio.core.sequence.features;
25-
26-
/**
27-
* @author mckeee1
28-
*
29-
*/
30-
public class Qualifier {
31-
String value;
32-
String name;
33-
34-
boolean needsQuotes;
35-
36-
/**
37-
*
38-
*/
39-
public Qualifier(String name, String value) {
40-
// TODO Auto-generated constructor stub
41-
this.name=name;
42-
this.value=value;
43-
needsQuotes = false;
44-
}
45-
46-
/**
47-
*
48-
*/
49-
public Qualifier(String name, String value, boolean needsQuotes) {
50-
// TODO Auto-generated constructor stub
51-
this.name=name;
52-
this.value=value;
53-
this.needsQuotes = needsQuotes;
54-
}
55-
56-
/**
57-
* @return the name
58-
*/
59-
public String getName() {
60-
return name;
61-
}
62-
63-
/**
64-
* @return the value
65-
*/
66-
public String getValue() {
67-
return value;
68-
}
69-
70-
/**
71-
* @return the needsQuotes
72-
*/
73-
public boolean needsQuotes() {
74-
return needsQuotes;
75-
}
76-
/**
77-
* @param name the name to set
78-
*/
79-
public void setName(String name) {
80-
this.name = name;
81-
}
82-
/**
83-
* @param needsQuotes the needsQuotes to set
84-
*/
85-
public void setNeedsQuotes(boolean needsQuotes) {
86-
this.needsQuotes = needsQuotes;
87-
}
88-
89-
/**
90-
* @param value the value to set
91-
*/
92-
public void setValue(String value) {
93-
this.value = value;
94-
}
95-
}
21+
/**
22+
*
23+
*/
24+
package org.biojava.nbio.core.sequence.features;
25+
26+
/**
27+
* @author mckeee1
28+
*
29+
*/
30+
public class Qualifier {
31+
String value;
32+
String name;
33+
34+
boolean needsQuotes;
35+
36+
/**
37+
*
38+
*/
39+
public Qualifier(String name, String value) {
40+
// TODO Auto-generated constructor stub
41+
this.name=name;
42+
this.value=value;
43+
needsQuotes = false;
44+
}
45+
46+
/**
47+
*
48+
*/
49+
public Qualifier(String name, String value, boolean needsQuotes) {
50+
// TODO Auto-generated constructor stub
51+
this.name=name;
52+
this.value=value;
53+
this.needsQuotes = needsQuotes;
54+
}
55+
56+
/**
57+
* @return the name
58+
*/
59+
public String getName() {
60+
return name;
61+
}
62+
63+
/**
64+
* @return the value
65+
*/
66+
public String getValue() {
67+
return value;
68+
}
69+
70+
/**
71+
* @return the needsQuotes
72+
*/
73+
public boolean needsQuotes() {
74+
return needsQuotes;
75+
}
76+
/**
77+
* @param name the name to set
78+
*/
79+
public void setName(String name) {
80+
this.name = name;
81+
}
82+
/**
83+
* @param needsQuotes the needsQuotes to set
84+
*/
85+
public void setNeedsQuotes(boolean needsQuotes) {
86+
this.needsQuotes = needsQuotes;
87+
}
88+
89+
/**
90+
* @param value the value to set
91+
*/
92+
public void setValue(String value) {
93+
this.value = value;
94+
}
95+
96+
@Override
97+
public String toString() {
98+
return "Qualifier[ name='" + name +"' value='"+ value + "' ]";
99+
}
100+
}

biojava-core/src/main/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReader.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ public GenbankProxySequenceReader(
8888
header = genbankParser.getHeader();
8989
features = genbankParser.getFeatures();
9090

91-
if (compoundSet.equals(AminoAcidCompoundSet.class)) {
91+
if (compoundSet.getClass().equals(AminoAcidCompoundSet.class)) {
9292
if (!genbankParser.getCompoundType().equals(compoundSet)) {
9393
logger.error("Declared compount type {} does not mach the real: {}", genbankParser.getCompoundType().toString(), compoundSet.toString());
9494
throw new IOException("Wrong declared compound type for: " + accessionID);
@@ -103,7 +103,7 @@ private BufferedInputStream getBufferedInputStream(String accessionID, String db
103103
if (genbankDirectoryCache != null && genbankDirectoryCache.length() > 0) {
104104
File f = new File(genbankDirectoryCache + File.separatorChar + accessionID + ".gb");
105105
if (f.exists()) {
106-
logger.info("Reading: {}", f.toString());
106+
logger.debug("Reading: {}", f.toString());
107107
inStream = new BufferedInputStream(new FileInputStream(f));
108108
} else {
109109
InputStream in = getEutilsInputStream(accessionID, db);
@@ -135,7 +135,7 @@ private void copyInputStreamToFile(InputStream in, File f) throws IOException, I
135135

136136
private InputStream getEutilsInputStream(String accessionID, String db) throws IOException {
137137
String genbankURL = eutilBaseURL + "efetch.fcgi?db=" + db + "&id=" + accessionID + "&rettype=gb&retmode=text";
138-
logger.info("Loading: {}", genbankURL);
138+
logger.trace("Loading: {}", genbankURL);
139139
URL genbank = new URL(genbankURL);
140140
URLConnection genbankConnection = genbank.openConnection();
141141
return genbankConnection.getInputStream();

biojava-core/src/main/java/org/biojava/nbio/core/sequence/location/InsdcParser.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,11 @@ public class InsdcParser <S extends AbstractSequence<C>, C extends Compound>{
5555
* parse a location. if group(1) is null than the feature is on the positive
5656
* strand, group(2) start position, group(3) end position.
5757
*/
58-
protected static final Pattern singleLocationPattern = Pattern.compile("(?:[A-Z]([A-Za-z\\.0-9_]*?):)?(<?)(\\d+)(\\.{2}|\\^)?(>?)(\\d+)?(>?)?");
58+
// why in the location the first character was ignored?
59+
//protected static final Pattern singleLocationPattern = Pattern.compile("(?:[A-Z]([A-Za-z\\.0-9_]*?):)?(<?)(\\d+)(\\.{2}|\\^)?(>?)(\\d+)?(>?)?");
60+
61+
// fixed issue #254
62+
protected static final Pattern singleLocationPattern = Pattern.compile("(?:([A-Za-z\\.0-9_]*?):)?(<?)(\\d+)(\\.{2}|\\^)?(>?)(\\d+)?(>?)?");
5963
/**
6064
* Decodes a split pattern. Split patterns are a composition of multiple
6165
* locationsString qualified by actions: join(location,location, ...

0 commit comments

Comments
 (0)