4141import java .io .InputStream ;
4242import java .net .URL ;
4343import java .util .LinkedHashMap ;
44+ import java .util .List ;
45+ import org .biojava .nbio .core .sequence .features .Qualifier ;
4446
4547/**
4648 * The representation of a ProteinSequence
5052 */
5153public class ProteinSequence extends AbstractSequence <AminoAcidCompound > {
5254
53- private final static Logger logger = LoggerFactory .getLogger (ProteinSequence .class );
54-
55- /*
56- private ArrayList<FeatureInterface<AbstractSequence<AminoAcidCompound>, AminoAcidCompound>> features
57- = new ArrayList<FeatureInterface<AbstractSequence<AminoAcidCompound>, AminoAcidCompound>>();
58- private LinkedHashMap<String, ArrayList<FeatureInterface<AbstractSequence<AminoAcidCompound>, AminoAcidCompound>>> groupedFeatures
59- = new LinkedHashMap<String, ArrayList<FeatureInterface<AbstractSequence<AminoAcidCompound>, AminoAcidCompound>>>();
60- */
55+ private final static Logger logger = LoggerFactory .getLogger (ProteinSequence .class );
6156
57+ /*
58+ private ArrayList<FeatureInterface<AbstractSequence<AminoAcidCompound>, AminoAcidCompound>> features
59+ = new ArrayList<FeatureInterface<AbstractSequence<AminoAcidCompound>, AminoAcidCompound>>();
60+ private LinkedHashMap<String, ArrayList<FeatureInterface<AbstractSequence<AminoAcidCompound>, AminoAcidCompound>>> groupedFeatures
61+ = new LinkedHashMap<String, ArrayList<FeatureInterface<AbstractSequence<AminoAcidCompound>, AminoAcidCompound>>>();
62+ */
6263 /**
6364 * Create a protein from a string
6465 *
6566 * @param seqString
66- * @throws CompoundNotFoundException
67+ * @throws CompoundNotFoundException
6768 */
68- public ProteinSequence (String seqString ) throws CompoundNotFoundException {
69+ public ProteinSequence (String seqString ) throws CompoundNotFoundException {
6970 this (seqString , AminoAcidCompoundSet .getAminoAcidCompoundSet ());
7071 }
7172
@@ -74,7 +75,7 @@ public ProteinSequence(String seqString) throws CompoundNotFoundException {
7475 *
7576 * @param seqString
7677 * @param compoundSet
77- * @throws CompoundNotFoundException
78+ * @throws CompoundNotFoundException
7879 */
7980 public ProteinSequence (String seqString , CompoundSet <AminoAcidCompound > compoundSet ) throws CompoundNotFoundException {
8081 super (seqString , compoundSet );
@@ -97,9 +98,35 @@ public ProteinSequence(ProxySequenceReader<AminoAcidCompound> proxyLoader) {
9798 * file or via a Uniprot Proxy reader via Uniprot ID
9899 *
99100 * @param proxyLoader
101+ * @param compoundSet
100102 */
101103 public ProteinSequence (ProxySequenceReader <AminoAcidCompound > proxyLoader , CompoundSet <AminoAcidCompound > compoundSet ) {
102104 super (proxyLoader , compoundSet );
105+
106+ // do protein-specific tasks
107+ // add source if found
108+ List <FeatureInterface <AbstractSequence <AminoAcidCompound >, AminoAcidCompound >> CDSFeatures = getFeaturesByType ("CDS" );
109+
110+ // cases if a protein has more than 1 parent are not supported yet
111+ if (CDSFeatures .size () == 1 ) {
112+ Qualifier codedBy = CDSFeatures .get (0 ).getQualifiers ().get ("coded_by" );
113+
114+ if (codedBy != null ) {
115+ String codedBySeq = codedBy .getValue ();
116+
117+ InsdcParser parser = new InsdcParser (DataSource .GENBANK );
118+ Location location = parser .parse (codedBySeq );
119+
120+ try {
121+ DNASequence dnaSeq = new DNASequence (getSequence (location ), DNACompoundSet .getDNACompoundSet ());
122+ setParentDNASequence (dnaSeq , location .getStart ().getPosition (), location .getEnd ().getPosition ());
123+ } catch (CompoundNotFoundException e ) {
124+ // TODO is there another solution to handle this exception?
125+ logger .error ("Could not add 'coded_by' parent DNA location feature, unrecognised compounds found in DNA sequence: {}" , e .getMessage ());
126+ }
127+ }
128+ }
129+
103130 }
104131
105132 /**
@@ -125,39 +152,12 @@ public void setParentDNASequence(AbstractSequence<NucleotideCompound> parentDNAS
125152 setBioEnd (end );
126153 }
127154
128- /**
129- * Add feature.
130- * <p>
131- * If feature is type 'coded_by' than resolves parent DNA sequence.
132- * </p>
133- * @param feature
134- */
135- @ Override
136- public void addFeature (FeatureInterface <AbstractSequence <AminoAcidCompound >, AminoAcidCompound > feature ) {
137- super .addFeature (feature );
138-
139- // if feature is called 'coded_by' than add parent DNA location
140- if (feature .getType ().equals ("coded_by" )) {
141- InsdcParser parser = new InsdcParser (DataSource .GENBANK );
142-
143- Location location = parser .parse (feature .getSource ());
144- // convert location into DNASequence
145- try {
146- DNASequence dnaSeq = new DNASequence (getSequence (location ), DNACompoundSet .getDNACompoundSet ());
147- setParentDNASequence (dnaSeq , location .getStart ().getPosition (), location .getEnd ().getPosition ());
148- } catch (CompoundNotFoundException e ) {
149- // TODO is there another solution to handle this exception?
150- logger .error ("Could not add 'coded_by' parent DNA location feature, unrecognised compounds found in DNA sequence: {}" ,e .getMessage ());
151- }
152- }
153- }
154-
155155 private DNASequence getRawParentSequence (String accessId ) throws IOException {
156156 String seqUrlTemplate = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=%s&rettype=fasta&retmode=text" ;
157157 URL url = new URL (String .format (seqUrlTemplate , accessId ));
158-
159- logger .info ("Getting parent DNA sequence from URL: {}" , url .toString ());
160-
158+
159+ logger .trace ("Getting parent DNA sequence from URL: {}" , url .toString ());
160+
161161 InputStream is = url .openConnection ().getInputStream ();
162162
163163 FastaReader <DNASequence , NucleotideCompound > parentReader
@@ -176,14 +176,14 @@ private DNASequence getRawParentSequence(String accessId) throws IOException {
176176 }
177177
178178 private String getSequence (Location cdna ) {
179- DNASequence rawParent = null ;
179+ DNASequence rawParent ;
180180 if (!cdna .isComplex ()) {
181181 try {
182182 rawParent = getRawParentSequence (cdna .getAccession ().getID ());
183183 return cdna .getSubSequence (rawParent ).getSequenceAsString ();
184184 } catch (IOException e ) {
185185 // return null
186- logger .error ("Caught IOException when getting DNA sequence for id {}. Error: {}" , cdna .getAccession ().getID (), e .getMessage ());
186+ logger .error ("Caught IOException when getting DNA sequence for id {}. Error: {}" , cdna .getAccession ().getID (), e .getMessage ());
187187 return null ;
188188 }
189189 } else {
0 commit comments