File tree Expand file tree Collapse file tree
__other__/pubmed - download and textract Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1+ # author: Bartlomiej "furas" Burek (https://blog.furas.pl)
2+ # date: 2022.07.19
3+ # [python - getting weird results from metapub and pubmed - Stack Overflow](https://stackoverflow.com/questions/73043246/getting-weird-results-from-metapub-and-pubmed/)
4+
5+ import os
6+ from urllib .request import urlretrieve
7+ import metapub
8+ import textract
9+
10+ #another_path = '/content/Articles/'
11+ another_path = './'
12+
13+ pmid_list = ['35566889' ,'33538053' , '30848212' ]
14+
15+ for query in pmid_list :
16+
17+ print ('query:' , query )
18+
19+ url = metapub .FindIt (query ).url
20+ print ('url:' , url )
21+
22+ if url :
23+
24+ try :
25+ out_file = os .path .join (another_path , query )
26+ print ('out_file:' , out_file )
27+
28+ print ('... downloading' )
29+
30+ urlretrieve (url , out_file + '.pdf' )
31+
32+ print ('... processing' )
33+
34+ data = textract .process (out_file + '.pdf' , extension = 'pdf' , method = 'pdftotext' , encoding = "utf_8" )
35+
36+ print ('... saving' )
37+
38+ with open (out_file + '.txt' , "wb" ) as textfile : # save bytes
39+ textfile .write (data )
40+
41+ print ('... OK' )
42+
43+ except Exception as ex :
44+ print ('Exception:' , ex )
45+
You can’t perform that action at this time.
0 commit comments