File tree Expand file tree Collapse file tree 1 file changed +17
-6
lines changed
Expand file tree Collapse file tree 1 file changed +17
-6
lines changed Original file line number Diff line number Diff line change 1010
1111
1212def crawl (url ):
13-
13+ """
14+ Crawls a page
15+ Arguments:
16+ - URL of the page to crawl
17+ Return:
18+ - List of all unique links found
19+ """
20+
21+ found_link = []
1422 req = requests .get (url )
1523
1624 # Check if successful
1725 if (req .status_code != 200 ):
1826 return []
1927
20- # Find links
21- links = link_re .findall (req .text )
28+ # Finding unique links
29+ links = set ( link_re .findall (req .text ) )
2230
23- print ("\n Found {} links" .format (len (links )))
31+ print ("\n Found {} unique links" .format (len (links )))
2432
2533 # Search links for emails
2634 for link in links :
2735
2836 # Get an absolute URL for a link
2937 link = urljoin (url , link )
30-
38+ found_link . append ( link )
3139 print (link )
40+
41+ return found_link
3242
3343if __name__ == '__main__' :
34- crawl ('http://www.realpython.com' )
44+ url = input ("Enter a url to crawl: " )
45+ crawl (url )
You can’t perform that action at this time.
0 commit comments