Skip to content

Commit 0a92072

Browse files
committed
Merge pull request AlchemyAPI#1 from lineback/master
added new calls for combined, image, and taxonomy
2 parents cde4659 + b900060 commit 0a92072

File tree

2 files changed

+246
-1
lines changed

2 files changed

+246
-1
lines changed

alchemyapi.py

Lines changed: 147 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,16 @@ class AlchemyAPI:
112112
ENDPOINTS['microformats'] = {}
113113
ENDPOINTS['microformats']['url'] = '/url/URLGetMicroformatData'
114114
ENDPOINTS['microformats']['html'] = '/html/HTMLGetMicroformatData'
115-
115+
ENDPOINTS['combined'] = {}
116+
ENDPOINTS['combined']['url'] = '/url/URLGetCombinedData'
117+
ENDPOINTS['combined']['text'] = '/text/TextGetCombinedData'
118+
ENDPOINTS['image'] = {}
119+
ENDPOINTS['image']['url'] = '/url/URLGetImage'
120+
ENDPOINTS['taxonomy'] = {}
121+
ENDPOINTS['taxonomy']['url'] = '/url/URLGetRankedTaxonomy'
122+
ENDPOINTS['taxonomy']['html'] = '/html/HTMLGetRankedTaxonomy'
123+
ENDPOINTS['taxonomy']['text'] = '/text/TextGetRankedTaxonomy'
124+
116125
#The base URL for all endpoints
117126
BASE_URL = 'http://access.alchemyapi.com/calls'
118127

@@ -570,7 +579,144 @@ def microformats(self, flavor, data, options={}):
570579
options[flavor] = data
571580
return self.__analyze(AlchemyAPI.ENDPOINTS['microformats'][flavor], options)
572581

582+
def imageExtraction(self, flavor, data, options={}):
583+
"""
584+
Extracts main image from a URL
585+
586+
INPUT:
587+
flavor -> which version of the call (url only currently).
588+
data -> URL to analyze
589+
options -> various parameters that can be used to adjust how the API works,
590+
see below for more info on the available options.
591+
592+
Available Options:
593+
extractMode ->
594+
trust-metadata : (less CPU intensive, less accurate)
595+
always-infer : (more CPU intensive, more accurate)
596+
OUTPUT:
597+
The response, already converted from JSON to a Python object.
598+
"""
599+
if flavor not in AlchemyAPI.ENDPOINTS['image']:
600+
return { 'status':'ERROR', 'statusInfo':'image extraction for ' + flavor + ' not available' }
601+
options[flavor] = data
602+
return self.__analyze(AlchemyAPI.ENDPOINTS['image'][flavor], options)
603+
604+
def taxonomy(self, flavor, data, options={}):
605+
"""
606+
Taxonomy classification operations.
607+
608+
INPUT:
609+
flavor -> which version of the call, i.e. url or html.
610+
data -> the data to analyze, either the the url or html code.
611+
options -> various parameters that can be used to adjust how the API works, see below for more info on the available options.
612+
613+
614+
Available Options:
615+
showSourceText ->
616+
include the original 'source text' the taxonomy categories were extracted from within the API response
617+
Possible values:
618+
1 - enabled
619+
0 - disabled (default)
620+
621+
sourceText ->
622+
where to obtain the text that will be processed by this API call.
623+
624+
AlchemyAPI supports multiple modes of text extraction:
625+
web page cleaning (removes ads, navigation links, etc.), raw text extraction
626+
(processes all web page text, including ads / nav links), visual constraint queries, and XPath queries.
627+
628+
Possible values:
629+
cleaned_or_raw : cleaning enabled, fallback to raw when cleaning produces no text (default)
630+
cleaned : operate on 'cleaned' web page text (web page cleaning enabled)
631+
raw : operate on raw web page text (web page cleaning disabled)
632+
cquery : operate on the results of a visual constraints query
633+
Note: The 'cquery' http argument must also be set to a valid visual constraints query.
634+
xpath : operate on the results of an XPath query
635+
Note: The 'xpath' http argument must also be set to a valid XPath query.
636+
637+
cquery ->
638+
a visual constraints query to apply to the web page.
639+
640+
xpath ->
641+
an XPath query to apply to the web page.
642+
643+
baseUrl ->
644+
rel-tag output base http url (must be uri-argument encoded)
573645
646+
OUTPUT:
647+
The response, already converted from JSON to a Python object.
648+
649+
"""
650+
if flavor not in AlchemyAPI.ENDPOINTS['taxonomy']:
651+
return { 'status':'ERROR', 'statusInfo':'taxonomy for ' + flavor + ' not available' }
652+
options[flavor] = data
653+
return self.__analyze(AlchemyAPI.ENDPOINTS['taxonomy'][flavor], options)
654+
655+
def combined(self, flavor, data, options={}):
656+
"""
657+
Combined call for page-image, entity, keyword, title, author, taxonomy, concept.
658+
659+
INPUT:
660+
flavor -> which version of the call, i.e. url or html.
661+
data -> the data to analyze, either the the url or html code.
662+
options -> various parameters that can be used to adjust how the API works, see below for more info on the available options.
663+
664+
Available Options:
665+
extract ->
666+
Possible values: page-image, entity, keyword, title, author, taxonomy, concept
667+
default : entity, keyword, taxonomy, concept
668+
669+
disambiguate ->
670+
disambiguate detected entities
671+
Possible values:
672+
1 : enabled (default)
673+
0 : disabled
674+
675+
linkedData ->
676+
include Linked Data content links with disambiguated entities
677+
Possible values :
678+
1 : enabled (default)
679+
0 : disabled
680+
681+
coreference ->
682+
resolve he/she/etc coreferences into detected entities
683+
Possible values:
684+
1 : enabled (default)
685+
0 : disabled
686+
687+
quotations ->
688+
enable quotations extraction
689+
Possible values:
690+
1 : enabled
691+
0 : disabled (default)
692+
693+
sentiment ->
694+
enable entity-level sentiment analysis
695+
Possible values:
696+
1 : enabled
697+
0 : disabled (default)
698+
699+
showSourceText ->
700+
include the original 'source text' the entities were extracted from within the API response
701+
Possible values:
702+
1 : enabled
703+
0 : disabled (default)
704+
705+
maxRetrieve ->
706+
maximum number of named entities to extract
707+
default : 50
708+
709+
baseUrl ->
710+
rel-tag output base http url
711+
712+
713+
OUTPUT:
714+
The response, already converted from JSON to a Python object.
715+
"""
716+
if flavor not in AlchemyAPI.ENDPOINTS['combined']:
717+
return { 'status':'ERROR', 'statusInfo':'combined for ' + flavor + ' not available' }
718+
options[flavor] = data
719+
return self.__analyze(AlchemyAPI.ENDPOINTS['combined'][flavor], options)
574720

575721
def __analyze(self, endpoint, params):
576722
"""

example.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,105 @@
447447
print('')
448448

449449

450+
print('')
451+
print('')
452+
print('')
453+
print('############################################')
454+
print('# Image Extraction Example #')
455+
print('############################################')
456+
print('')
457+
print('')
450458

459+
print('Processing url: ', demo_url)
460+
print('')
451461

462+
response = alchemyapi.imageExtraction('url',demo_url)
463+
464+
if response['status'] == 'OK':
465+
print('## Response Object ##')
466+
print(json.dumps(response, indent=4))
467+
468+
print('')
469+
print('## Image ##')
470+
print('Image: ', response['image'])
471+
print('')
472+
473+
else:
474+
print('Error in image extraction call: ', response['statusInfo'])
475+
476+
print('')
477+
print('')
478+
479+
480+
print('')
481+
print('')
482+
print('')
483+
print('############################################')
484+
print('# Taxonomy Example #')
485+
print('############################################')
486+
print('')
487+
print('')
488+
489+
print('Processing text: ', demo_text)
490+
print('')
491+
492+
response = alchemyapi.taxonomy('text',demo_text)
493+
494+
if response['status'] == 'OK':
495+
print('## Response Object ##')
496+
print(json.dumps(response, indent=4))
497+
498+
print('')
499+
print('## Categories ##')
500+
for category in response['taxonomy']:
501+
print(category['label'], ' : ', category['score'])
502+
print('')
452503

504+
else:
505+
print('Error in taxonomy call: ', response['statusInfo'])
506+
507+
print('')
508+
print('')
509+
510+
511+
print('')
512+
print('')
513+
print('')
514+
print('############################################')
515+
print('# Combined Example #')
516+
print('############################################')
517+
print('')
518+
print('')
519+
520+
print('Processing text: ', demo_text)
521+
print('')
522+
523+
response = alchemyapi.combined('text',demo_text)
524+
525+
if response['status'] == 'OK':
526+
print('## Response Object ##')
527+
print(json.dumps(response, indent=4))
528+
529+
print('')
530+
531+
532+
print('## Keywords ##')
533+
for keyword in response['keywords']:
534+
print(keyword['text'], ' : ', keyword['relevance'])
535+
print('')
536+
537+
print('## Concepts ##')
538+
for concept in response['concepts']:
539+
print(concept['text'], ' : ', concept['relevance'])
540+
print('')
541+
542+
print('## Entities ##')
543+
for entity in response['entities']:
544+
print(entity['type'], ' : ', entity['text'], ', ', entity['relevance'])
545+
print(' ')
546+
547+
else:
548+
print('Error in combined call: ', response['statusInfo'])
549+
550+
print('')
551+
print('')

0 commit comments

Comments
 (0)