-
Notifications
You must be signed in to change notification settings - Fork 14
Expand file tree
/
Copy pathSTLPreprocess.py
More file actions
86 lines (55 loc) · 1.93 KB
/
STLPreprocess.py
File metadata and controls
86 lines (55 loc) · 1.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
from numpy import concatenate
from math import sqrt
import pickle
from sklearn.preprocessing import MinMaxScaler
from minepy import MINE
import pandas as pd
from pandas import Series
from pandas import DataFrame
from matplotlib import pyplot
import statsmodels.api as sm
def stl(data):
series = data[['date','arrival']]
series.date = pd.to_datetime(series.date,errors='coerce')
series = series.set_index('date')
res = sm.tsa.seasonal_decompose(series)
residual = res.resid.dropna()
trend = res.trend.dropna()
seaonality = res.seasonal.dropna()
return to_drop
def Scaler(data):
values = data.values
values = values.astype('float32')
scaler = MinMaxScaler(feature_range=(0,1))
scaled = scaler.fit_transform(values)
scaledDf = pd.DataFrame(scaled,columns=data.columns)
return scaler,scaledDf
def series_to_supervised(data):
dataset1 = data
# manually specify column names
dataset1.index.name = 'date'
df_finalfeature = dataset1.iloc[:,4:]
df_finalfeature.replace(np.nan, 0, inplace=True)
np.where(np.isnan(df_finalfeature.values))
scaler = sklearn.preprocessing.MinMaxScaler()
df_finalfeature__nor = pd.DataFrame(scaler.fit_transform(df_finalfeature), columns=df_finalfeature.columns)
# x__nor = x
df_finalfeature__nor.describe()
df_df = pd.concat([df_finalfeature__nor,dataset1['arrival']/1000000,dataset1['seasonality']/1000000,dataset1['trend']/1000000],axis = 1)
df_df = df_df[6:86]
df_1 = df_df.as_matrix() # convert to numpy array
data_1 = []
# create all possible sequences of length seq_len
for index in range(len(df_1) - 13):
data_1.append(df_1[index: index + 13])
data_1 = np.array(data_1)
y_1 = np.zeros((len(data_1), 1))
for i in range(0,len(y_1)):
y_1[i]=data_1[i][12][77]
x_1 = np.zeros((len(data_1),12,78))
for i in range(0,len(x_1)):
x_1[i]=data_1[i][0:12]
return x_1,y_1