Skip to content

Commit 6c6cdaa

Browse files
committed
add ability to pull gene and protein aliases from uniprot
1 parent b2e9704 commit 6c6cdaa

3 files changed

Lines changed: 591 additions & 573 deletions

File tree

biojava-core/src/main/java/org/biojava/nbio/core/sequence/io/GenericInsdcHeaderFormat.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.biojava.nbio.core.sequence.Strand;
2727
import org.biojava.nbio.core.sequence.features.FeatureInterface;
2828
import org.biojava.nbio.core.sequence.features.Qualifier;
29+
import org.biojava.nbio.core.sequence.location.template.AbstractLocation;
2930
import org.biojava.nbio.core.sequence.location.template.Point;
3031
import org.biojava.nbio.core.sequence.template.AbstractSequence;
3132
import org.biojava.nbio.core.sequence.template.Compound;
@@ -220,7 +221,7 @@ private String _insdc_feature_location_string(FeatureInterface<AbstractSequence<
220221

221222
private String _insdc_location_string_ignoring_strand_and_subfeatures(
222223
//SequenceLocation<AbstractSequence<C>, C> sequenceLocation,
223-
org.biojava.nbio.core.sequence.location.template.AbstractLocation sequenceLocation,
224+
AbstractLocation sequenceLocation,
224225
int record_length) {
225226
/*
226227
if location.ref:

biojava-core/src/main/java/org/biojava/nbio/core/sequence/loader/UniprotProxySequenceReader.java

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,10 +337,21 @@ public ArrayList<AccessionID> getAccessions() throws XPathExpressionException {
337337

338338
/**
339339
* Pull uniprot protein aliases associated with this sequence
340+
* Provided for backwards compatibility now that we support both
341+
* gene and protein aliases via separate methods.
340342
* @return
341343
* @throws XPathExpressionException
342344
*/
343345
public ArrayList<String> getAliases() throws XPathExpressionException {
346+
347+
return getProteinAliases();
348+
}
349+
/**
350+
* Pull uniprot protein aliases associated with this sequence
351+
* @return
352+
* @throws XPathExpressionException
353+
*/
354+
public ArrayList<String> getProteinAliases() throws XPathExpressionException {
344355
ArrayList<String> aliasList = new ArrayList<String>();
345356
if (uniprotDoc == null) {
346357
return aliasList;
@@ -353,6 +364,32 @@ public ArrayList<String> getAliases() throws XPathExpressionException {
353364
Element fullNameElement = XMLHelper.selectSingleElement(element, "fullName");
354365
aliasList.add(fullNameElement.getTextContent());
355366
}
367+
keyWordElementList = XMLHelper.selectElements(proteinElement, "recommendedName");
368+
for (Element element : keyWordElementList) {
369+
Element fullNameElement = XMLHelper.selectSingleElement(element, "fullName");
370+
aliasList.add(fullNameElement.getTextContent());
371+
}
372+
373+
return aliasList;
374+
}
375+
376+
/**
377+
* Pull uniprot gene aliases associated with this sequence
378+
* @return
379+
* @throws XPathExpressionException
380+
*/
381+
public ArrayList<String> getGeneAliases() throws XPathExpressionException {
382+
ArrayList<String> aliasList = new ArrayList<String>();
383+
if (uniprotDoc == null) {
384+
return aliasList;
385+
}
386+
Element uniprotElement = uniprotDoc.getDocumentElement();
387+
Element entryElement = XMLHelper.selectSingleElement(uniprotElement, "entry");
388+
Element proteinElement = XMLHelper.selectSingleElement(entryElement, "gene");
389+
ArrayList<Element> keyWordElementList = XMLHelper.selectElements(proteinElement, "name");
390+
for (Element element : keyWordElementList) {
391+
aliasList.add(element.getTextContent());
392+
}
356393

357394
return aliasList;
358395
}

0 commit comments

Comments
 (0)