Skip to content

Commit 6f3f151

Browse files
committed
added one-hot encoding
1 parent 1a640a9 commit 6f3f151

2 files changed

Lines changed: 51 additions & 0 deletions

File tree

File renamed without changes.

processing_categorical.R

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
### functions for categorical data processing
2+
# 1. One-hot encoding categorical features
3+
4+
5+
## loading libraries
6+
library(dummies)
7+
library(plyr)
8+
9+
10+
## function for one-hot encoding categorical features
11+
onehot_encode_categories <- function(X_train,X_test=data.frame())
12+
{
13+
# creating panel
14+
cat("Creating panel\n")
15+
16+
if (nrow(X_test) > 0)
17+
{
18+
panel <- rbind(X_train,X_test)
19+
}else
20+
{
21+
panel <- X_train
22+
}
23+
24+
# extracting categorical columns
25+
categorical_columns <- NULL
26+
27+
for (i in 1:ncol(panel))
28+
{
29+
if (class(panel[,i]) %in% c("character", "factor"))
30+
{
31+
categorical_columns <- c(categorical_columns, colnames(panel)[i])
32+
}
33+
}
34+
35+
# creating dummy variables
36+
cat("One-hot encoding the categorical variables")
37+
38+
if (length(categorical_columns) > 0)
39+
{
40+
panel <- dummy.data.frame(panel, names=categorical_columns, sep="_")
41+
colnames(panel) <- gsub("[[:punct:]]", "", colnames(panel))
42+
colnames(panel) <- gsub("[[:space:]]+", " ", colnames(panel))
43+
colnames(panel) <- gsub(" ", "_", colnames(panel))
44+
}
45+
46+
X_train <- panel[1:nrow(X_train),]
47+
X_test <- panel[(nrow(X_train)+1):nrow(panel),]
48+
49+
return(list(X_train,X_test))
50+
}
51+

0 commit comments

Comments
 (0)