241030

딥러닝

In [1]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import *
from sklearn.preprocessing import StandardScaler, MinMaxScaler

from keras.models import Sequential
from keras.layers import Dense, Input
from keras.backend import clear_session
from keras.optimizers import Adam

In [2]:

path = 'https://raw.githubusercontent.com/DA4BAM/dataset/master/advertising.csv'
adv = pd.read_csv(path)
adv.head()

Out[2]:

TVRadioNewspaperSales01234

230.1	37.8	69.2	22.1
44.5	39.3	45.1	10.4
17.2	45.9	69.3	9.3
151.5	41.3	58.5	18.5
180.8	10.8	58.4	12.9

In [3]:

target = 'Sales'
x = adv.drop(columns = target)
y = adv.loc[:, target]

In [4]:

# 데이터 분할
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2, random_state = 1)

# scaling
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_val = scaler.transform(x_val)

In [5]:

# feature 개수
nfeatures = x_train.shape[1]

# 메모리 정리
clear_session()

# Sequential 모델
model = Sequential([Input(shape = (nfeatures,)),
                    Dense(1)])

# 모델 확인
model.summary()

In [6]:

# compile
model.compile(optimizer = 'adam', loss = 'mse')

# 학습
model.fit(x_train, y_train)

# 예측
y_pred = model.predict(x_val)

# 검증
print(mean_absolute_error(y_val, y_pred))
print(mean_absolute_percentage_error(y_val, y_pred))
print(root_mean_squared_error(y_val, y_pred))

5/5 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 210.2498
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 109ms/step
15.052200520802291
1.0141108422779446
15.64066154625038

In [10]:

# compile
model.compile(optimizer = Adam(learning_rate = 0.001), loss = 'mse')    # learning_rate default 값은 0.001

# 학습
model.fit(x_train, y_train, epochs = 50, validation_split= 0.2, verbose = 0)    # epochs default 값은 1, validation_split은 0.0

# 예측
y_pred = model.predict(x_val)

# 검증
print(mean_absolute_error(y_val, y_pred))
print(mean_absolute_percentage_error(y_val, y_pred))
print(root_mean_squared_error(y_val, y_pred))

2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 22ms/step
14.1109373408556
0.9482398194975982
14.696283366817203

그래프 그리기

In [11]:

path = 'https://raw.githubusercontent.com/DA4BAM/dataset/master/Carseats.csv'
data = pd.read_csv(path)
data.head()

Out[11]:

SalesCompPriceIncomeAdvertisingPopulationPriceShelveLocAgeEducationUrbanUS01234

9.50	138	73	11	276	120	Bad	42	17	Yes	Yes
11.22	111	48	16	260	83	Good	65	10	Yes	Yes
10.06	113	35	10	269	80	Medium	59	12	Yes	Yes
7.40	117	100	4	466	97	Medium	55	14	Yes	Yes
4.15	141	64	3	340	128	Bad	38	13	Yes	No

In [12]:

# 데이터 분리
target = 'Sales'
x = data.drop(target, axis=1)
y = data.loc[:, target]

# 가변수화
cols = ['ShelveLoc', 'Education', 'US', 'Urban']
x = pd.get_dummies(x, columns = cols, drop_first = True)

# 데이터 분할
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size= 0.2, random_state = 1)

# scaling
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_val = scaler.transform(x_val)

In [22]:

# feature 개수
nfeatures = x_train.shape[1]

# 메모리 정리
clear_session()

# Sequential 모델
model2 = Sequential([Input(shape = (nfeatures,)),
                     Dense(1)])

# 모델 확인
model2.summary()

In [23]:

# compile
model2.compile(optimizer = Adam(learning_rate = 0.1), loss = 'mse')

# 학습
history = model2.fit(x_train, y_train, epochs = 30, validation_split = 0.2, verbose = 0).history

# 예측
y_pred2 = model2.predict(x_val)

# 검증
print(mean_absolute_error(y_val, y_pred2))
print(mean_absolute_percentage_error(y_val, y_pred2))
print(root_mean_squared_error(y_val, y_pred2))

3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 15ms/step
1.7647960083484648
284087497942631.1
2.153965030346456

In [24]:

plt.figure(figsize = (10,5))
plt.plot(history['loss'], label = 'train_err', marker = '.')
plt.plot(history['val_loss'], label = 'val_err', marker = '.')
plt.legend(loc = 'upper right')
plt.grid()
plt.show()

Hidden layer

In [25]:

path = 'https://raw.githubusercontent.com/DA4BAM/dataset/master/boston.csv'
data = pd.read_csv(path)

target = 'medv'
x = data.drop(target, axis = 1)
y = data.loc[:, target]

x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=.2, random_state = 1)

# scaling
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_val = scaler.transform(x_val)

In [29]:

# feature 개수
nfeatures = x_train.shape[1]

# 메모리 정리
clear_session()

# Sequential 모델
model3 = Sequential([Input(shape = (nfeatures,)),
                     Dense(4, activation = 'relu'),
                     Dense(1)])

# 모델 확인
model3.summary()

In [30]:

# compile
model3.compile(optimizer = Adam(learning_rate = 0.1), loss = 'mse')

# 학습
history = model3.fit(x_train, y_train, epochs = 100, validation_split = 0.2, verbose = 0).history

# 예측
y_pred3 = model3.predict(x_val)

# 검증
print(mean_absolute_error(y_val, y_pred3))
print(mean_absolute_percentage_error(y_val, y_pred3))
print(root_mean_squared_error(y_val, y_pred3))

4/4 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step
3.4524976487253234
0.19954878042560453
4.164142401809952

In [31]:

plt.figure(figsize = (10,5))
plt.plot(history['loss'], label = 'train_err', marker = '.')
plt.plot(history['val_loss'], label = 'val_err', marker = '.')
plt.legend(loc = 'upper right')
plt.grid()
plt.show()

In [31]: