-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathPenguin_Network_Code.R
More file actions
145 lines (115 loc) · 4.36 KB
/
Penguin_Network_Code.R
File metadata and controls
145 lines (115 loc) · 4.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
######################################################
## Using Networks to Visualize relationships
## Written By RProDigest X handle @RProDigest
## Date 23rd Sept., 2023
## Dataset: Palmerpenguins
## All Atributions acknoweldged for use of PalmerPenguins & R Packages
## Tidyverse, Magrittr, visNetwork, htmlwidgets and Broom
## Historical Fact: Leonhard Euler solved the Seven Bridges of Königsberg problem
## in 1736. Königsberg is now Kaliningrad, Russia.
##
## References:
## 1. https://datastorm-open.github.io/visNetwork/legend.html
## 2. https://datastorm-open.github.io/visNetwork/options.html
#######################################################
# Load and install libraries using the pacman package manager
if (!require(pacman)) install.packages("pacman")
pacman::p_load(
tidyverse, # For data manipulation and visualization
visNetwork, # For network visualization
tidymodels, # For data preprocessing
htmlwidgets, # For creating R bindings to JavaScript
magrittr, # Required for magrittr pipes
palmerpenguins, # Access to the Penguins dataset
showtext # For customized fonts
)
# Enable custom fonts
font_add_google("Roboto", 'Roboto')
showtext_auto()
# Load the dataset and glimpse it
pengs <- palmerpenguins::penguins
glimpse(pengs)
# One hot encode species & drop other variables
pengs$species %<>% as.numeric()
pengs |>
filter(sex != "female") |>
select(-sex, -island, -year) |>
drop_na() ->
penguins_cleaned
## Notes to myself
## Species Adelie = 1, Chinstrap = 2, Gentoo = 3
# Additional Data Processing
# We keep only the unique rows since duplicate rows would have a distance of 0.
# Create a distance matrix
penguins_cleaned_distance <- penguins_cleaned |>
stats::dist()
# Normalize the distances to values between 0 and 1
penguins_cleaned_distance <- penguins_cleaned_distance / max(penguins_cleaned_distance)
# Use 1 - distance to obtain the value for similarity
penguin_similarity <- 1 - penguins_cleaned_distance
# Create the network edges
# Create an edge dataframe
penguin_edges <- penguin_similarity |>
tidy()
# Rename columns for visNetwork input
colnames(penguin_edges) <- c("from", "to", "value")
# We set the median as the threshold for edge values
penguin_edges <- penguin_edges |>
subset(value > median(penguin_edges$value))
# Arrange by order of edge thickness
penguin_edges <- penguin_edges |>
arrange(desc(value))
# Get only the top 300 edges
penguin_edges <- penguin_edges[1:500,]
# Create the network nodes
# Get unique nodes from the edges dataframe and combine them
penguin_nodes_from <- data.frame(id = unique(penguin_edges$from))
penguin_nodes_to <- data.frame(id = unique(penguin_edges$to))
penguin_nodes <- bind_rows(penguin_nodes_from, penguin_nodes_to)
# Retain unique nodes in case nodes are repeated in `from` and `to` columns
penguin_nodes <- unique(penguin_nodes)
# Add color to the nodes dataframe based on the Species value
# Get species for the legend
palmerpenguins::penguins |>
filter(sex != "female") |>
drop_na() |>
select(species) ->
penguin_species
penguin_species$id <- rownames(penguin_species)
# Join species information to the nodes dataframe
penguin_nodes |>
left_join(penguin_species, by = "id") ->
penguin_nodes
# Rename the 'species.y' column to 'group'
penguin_nodes |>
rename(group = species) ->
penguin_nodes
# Create the network
penguin_network <- visNetwork(penguin_nodes,
penguin_edges,
improvedLayout =TRUE,
main = list(
text = "Palmer Penguins (Male) Network\nTop 500 Network Connections",
style = "font-family:Roboto;text-align:center;font-weight:bold;font-size:18px"
),
submain = list(
text = "@RProDigest (X Handle)",
style = "font-family:Roboto;text-align:center;font-weight:bold"
)
) %>%
visGroups(groupname = "Adelie", color = "#D55E00") |>
visGroups(groupname = "Chinstrap", color = "#0072B2") |>
visGroups(groupname = "Gentoo", color = "#009E73") |>
visLegend(
width = 0.1, position = "right",
main = list(
text = "Penguins",
style = "font-family:Roboto;text-align:center;font-weight:bold"
),
stepY = 100,
stepX = 50
) |>
visLayout(randomSeed = 2023) |>
visOptions(highlightNearest =TRUE)
# Display the network
penguin_network