-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathProgram.cs
More file actions
126 lines (96 loc) · 4.96 KB
/
Program.cs
File metadata and controls
126 lines (96 loc) · 4.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
namespace DataScraper
{
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net.Http;
using System.Threading.Tasks;
using HtmlAgilityPack;
using Newtonsoft.Json;
public class Link
{
public string Icon { get; set; }
public string Url { get; set; }
}
public class Session
{
public int Id { get; set; }
public string Title { get; set; }
public string Abstract { get; set; }
}
public class Speaker
{
public int Id { get; set; }
public string Name { get; set; }
public string Bio { get; set; }
public IEnumerable<Link> Links { get; set; }
public IEnumerable<Session> Sessions { get; set; }
}
public class Program
{
public static void Main(string[] args)
{
const string AgendaUrl = "/Agenda";
const string OutputDir =
"D:\\Code\\BeyondResponsiveDesign\\BeyondResponsiveDesign.Menus\\images\\speakers";
var imageClient = new HttpClient();
var htmlClient = new HttpClient { BaseAddress = new Uri("http://www.dddeastanglia.com") };
var htmlDoc = new HtmlDocument();
htmlDoc.LoadHtml(htmlClient.GetStringAsync(AgendaUrl).Result);
var speakerLinks =
htmlDoc.DocumentNode.Descendants("a")
.Where(o => o.Attributes.Contains("class") && o.Attributes["class"].Value == "speakerName");
var speakers = new ConcurrentQueue<Speaker>();
Task.WaitAll(speakerLinks.Select(link => Task.Run(async () =>
{
var href = link.Attributes["href"].Value;
var speakerPage = new HtmlDocument();
speakerPage.LoadHtml(await htmlClient.GetStringAsync(href));
var container =
speakerPage.DocumentNode.Descendants("div")
.First(
o => o.Attributes.Contains("class") && o.Attributes["class"].Value.Contains("speaker"));
var titleElement = container.Descendants("h3").First();
var linkElements = container.Descendants("section").First().Descendants("p");
var paragraphElements = container.Elements("p");
var sessionLinkElements = container.Element("ul").Descendants("a");
var id = int.Parse(href.Split('/').Last());
var name = titleElement.Element("#text").InnerHtml;
var bio = string.Join(string.Empty, paragraphElements.Select(o => o.InnerHtml));
var links = (from linkElement in linkElements
let icon = linkElement.Descendants("i").First().Attributes["class"].Value
let url = linkElement.Descendants("a").First().Attributes["href"].Value
select new Link { Icon = icon, Url = url }).ToList();
var sessions = new List<Session>();
foreach (var sessionLinkElement in sessionLinkElements)
{
var sessionHref = sessionLinkElement.Attributes["href"].Value;
var sessionId = int.Parse(sessionHref.Split('/').Last());
var title = sessionLinkElement.InnerText;
var sessionPage = new HtmlDocument();
sessionPage.LoadHtml(await htmlClient.GetStringAsync(sessionHref));
var abstractElement =
sessionPage.DocumentNode.Descendants("div")
.First(o => o.Attributes.Contains("class") && o.Attributes["class"].Value == "abstract");
var @abstract = abstractElement.InnerHtml;
sessions.Add(new Session { Id = sessionId, Title = title, Abstract = @abstract});
}
speakers.Enqueue(
new Speaker { Id = id, Name = name, Links = links, Bio = bio, Sessions = sessions });
var imageTag = titleElement.Descendants("img").First();
var imageUrl = imageTag.Attributes["src"].Value.Replace("s=50", "s=300");
var stream = await imageClient.GetStreamAsync(imageUrl);
var file = File.OpenWrite(string.Format("{0}\\{1}.jpg", OutputDir, id));
await stream.CopyToAsync(file);
file.Close();
})).ToArray());
var streamWriter = new StreamWriter(string.Format("{0}\\speakers.json", OutputDir));
streamWriter.Write(JsonConvert.SerializeObject(speakers.OrderBy(o => o.Name)));
streamWriter.Close();
Console.WriteLine("Process completed, press any key to exit");
Console.ReadKey();
}
}
}