KT AIVLE/Daily Review

241101

bestone888 2024. 11. 3. 19:57

241101

In [1]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import *
from sklearn.preprocessing import StandardScaler, MinMaxScaler

from keras.models import Sequential
from keras.layers import Dense, Flatten, Input
from keras.backend import clear_session
from keras.optimizers import Adam
from keras.datasets import mnist, fashion_mnist

1. 다중 분류 (MNIST)

In [2]:

# 데이터 불러오기
(x_train, y_train), (x_val, y_val) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
11490434/11490434 ━━━━━━━━━━━━━━━━━━━━ 1s 0us/step

In [3]:

x_train.shape, y_train.shape

Out[3]:

((60000, 28, 28), (60000,))

In [5]:

# 데이터 2차원으로 펼치기
x_train = x_train.reshape(60000,-1)
x_val = x_val.reshape(10000,-1)

x_train.shape, x_val.shape

Out[5]:

((60000, 784), (10000, 784))

In [6]:

# scaling
x_train = x_train / 255.
x_val = x_val / 255.

In [9]:

# 모델링
nfeatures = x_train.shape[1]

clear_session()

model = Sequential([Input(shape = (nfeatures, )),
                    Dense(128, activation = 'relu'),
                    Dense(64 ,activation = 'relu'),
                    Dense(32, activation = 'relu'),
                    Dense(10, activation = 'softmax')])

# 컴파일, 학습
model.compile(optimizer = Adam(learning_rate = 0.001), loss = 'sparse_categorical_crossentropy')
hist = model.fit(x_train, y_train, epochs = 50, validation_split = .2, verbose = 0).history

In [13]:

# 시각화
plt.plot(hist['loss'], label = 'train_err', marker = '.')
plt.plot(hist['val_loss'], label = 'val_err', marker = '.')
plt.legend()
plt.grid()
plt.show()

In [14]:

# 과적합 발생

In [14]:

2. Early Stopping

In [20]:

from keras.callbacks import EarlyStopping

In [15]:

path = "https://raw.githubusercontent.com/DA4BAM/dataset/master/overfit_sample.csv"
data = pd.read_csv(path)
data.tail()

Out[15]:

target012345678...290291292293294295296297298299245246247248249

1	-0.068	-0.184	-1.153	0.610	0.414	1.557	-0.234	0.950	0.896	...	1.492	1.430	-0.333	-0.200	-1.073	0.797	1.980	1.191	1.032	-0.402
0	-0.234	-1.373	-2.050	-0.408	-0.255	0.784	0.986	-0.891	-0.268	...	-0.996	0.678	1.395	0.714	0.215	-0.537	-1.267	-1.021	0.747	0.128
0	-2.327	-1.834	-0.762	0.660	-0.858	-2.764	-0.539	-0.065	0.549	...	-1.237	-0.620	0.670	-2.010	0.438	1.972	-0.379	0.676	-1.220	-0.855
1	-0.451	-0.204	-0.762	0.261	0.022	-1.487	-1.122	0.141	0.369	...	0.729	0.411	2.366	-0.021	0.160	0.045	0.208	-2.117	-0.546	-0.093
0	0.725	1.064	1.333	-2.863	0.203	1.898	0.434	1.207	-0.015	...	-1.028	1.081	0.607	0.550	-2.621	-0.143	-0.544	-1.690	-0.198	0.643

5 rows × 301 columns

In [16]:

# 데이터분할 : x, y
target = 'target'
x = data.drop(columns = target)
y = data[target]

# 데이터분할 : train, validation
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.3, random_state = 1)

In [17]:

# 스케일링
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_val = scaler.transform(x_val)

In [18]:

# 모델링
nfeatures = x_train.shape[1]

clear_session()

model = Sequential([Input(shape = (nfeatures, )),
                    Dense(128, activation = 'relu'),
                    Dense(64, activation = 'relu'),
                    Dense(32, activation = 'relu'),
                    Dense(1, activation = 'sigmoid')])

# 컴파일, 학습
model.compile(optimizer = Adam(learning_rate = 0.001), loss = 'binary_crossentropy')
hist = model.fit(x_train, y_train, epochs = 50, verbose = 0, validation_split = 0.2).history

In [19]:

# 시각화
plt.plot(hist['loss'], label = 'train_err', marker = '.')
plt.plot(hist['val_loss'], label = 'val_err', marker = '.')
plt.legend()
plt.grid()
plt.show()

In [21]:

# early stopping 설정
min_de = 0.001
pat = 5

es = EarlyStopping(monitor = 'val_loss', min_delta = min_de, patience = pat)

# 학습
hist = model.fit(x_train, y_train, epochs = 50, validation_split = 0.2,
                 callbacks = [es]).history

# 시각화
plt.plot(hist['loss'], label = 'train_err', marker = '.')
plt.plot(hist['val_loss'], label = 'val_err', marker = '.')
plt.legend()
plt.grid()
plt.show()

Epoch 1/50
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 43ms/step - loss: 0.0286 - val_loss: 1.2123
Epoch 2/50
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0323 - val_loss: 1.0409
Epoch 3/50
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.0218 - val_loss: 1.1792
Epoch 4/50
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.0137 - val_loss: 1.0717
Epoch 5/50
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.0115 - val_loss: 1.1745
Epoch 6/50
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.0112 - val_loss: 1.1232
Epoch 7/50
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 0.0095 - val_loss: 1.1672

In [ ]:

3. Dropout

In [22]:

from keras.layers import Dropout

In [23]:

# 모델링
nfeatures = x_train.shape[1]

clear_session()

model1 = Sequential( [Input(shape = (nfeatures,)),
                      Dense(128, activation= 'relu'),
                      Dense(64, activation= 'relu'),
                      Dense(32, activation= 'relu'),
                      Dense(1, activation= 'sigmoid')] )

# 컴파일, 학습
model1.compile(optimizer= Adam(learning_rate = 0.001), loss='binary_crossentropy')
hist1 = model1.fit(x_train, y_train, epochs = 50, validation_split=0.2, verbose = 0).history

# 시각화
plt.plot(hist1['loss'], label = 'train_err', marker = '.')
plt.plot(hist1['val_loss'], label = 'val_err', marker = '.')
plt.legend()
plt.grid()
plt.show()

In [24]:

# dropout 모델링
clear_session()

model2 = Sequential( [Input(shape = (nfeatures,)),
                      Dense(128, activation= 'relu'),
                      Dropout(0.4),
                      Dense(64, activation= 'relu'),
                      Dropout(0.4),
                      Dense(32, activation= 'relu'),
                      Dropout(0.4),
                      Dense(1, activation= 'sigmoid')] )

# 컴파일, 학습
model2.compile(optimizer= Adam(learning_rate = 0.001), loss='binary_crossentropy')
hist2 = model2.fit(x_train, y_train, epochs = 50, validation_split=0.2, verbose = 0).history

# 시각화
plt.plot(hist2['loss'], label = 'train_err', marker = '.')
plt.plot(hist2['val_loss'], label = 'val_err', marker = '.')
plt.legend()
plt.grid()
plt.show()

In [ ]:

'KT AIVLE > Daily Review' 카테고리의 다른 글

241105 (0)	2024.11.05
241104 (0)	2024.11.04
241031 (0)	2024.10.31
241030 (0)	2024.10.30
241029 (0)	2024.10.30

현재글241101

bestone888 님의 블로그

Herzlich willkommen

Today :
Yesterday :

bestone888 님의 블로그

241101

241101

1. 다중 분류 (MNIST)

2. Early Stopping

3. Dropout

'KT AIVLE > Daily Review' 카테고리의 다른 글

'KT AIVLE/Daily Review'의 다른글

티스토리툴바

« 2025/05 »
일	월	화	수	목	금	토
				1	2	3
4	5	6	7	8	9	10
11	12	13	14	15	16	17
18	19	20	21	22	23	24
25	26	27	28	29	30	31

241101

241101

1. 다중 분류 (MNIST)

2. Early Stopping

3. Dropout

'KT AIVLE > Daily Review' 카테고리의 다른 글

'KT AIVLE/Daily Review'의 다른글

관련글

티스토리툴바