We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 61de19b commit dc43586Copy full SHA for dc43586
1 file changed
experiments/web.py
@@ -0,0 +1,30 @@
1
+from bs4 import BeautifulSoup
2
+import csv
3
+import json
4
+import requests
5
+
6
7
+def main():
8
+ url = 'http://yahoo.com'
9
+ req = requests.get(url)
10
+ content = req.text
11
+ soup = BeautifulSoup(content, "html.parser")
12
13
+ headlines = []
14
+ for headline in soup.find_all("h3"):
15
+ raw_headline = headline.get_text()
16
+ headline = raw_headline.strip()
17
+ if len(headline) < 10:
18
+ continue
19
+ headlines.append(headline)
20
21
+ print(json.dumps(headlines))
22
23
+ with open("headlines-output.csv", 'w') as out_file:
24
+ writer = csv.writer(out_file, delimiter=',')
25
+ writer.writerow(['headline'])
26
+ for headline in headlines:
27
+ writer.writerow([headline])
28
29
+if __name__ == '__main__':
30
+ main()
0 commit comments