Gradient Descent Optimiztion

  • SGD
  • Momentum
  • Nesterov
  • AdaGrad
  • RMSProp
  • Adam

import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pylab as plt
import random


def numerical_gradient(f, x):
h = 0.0001
# x와 같은 모양의 배열 gradient 생성.
gradient = np.zeros_like(x)

for idx in range(x.size):
tmp_val = x[idx]
x[idx] = tmp_val + h
fxh1 = f(x)

x[idx] = tmp_val - h
fxh2 = f(x)

gradient[idx] = (fxh1 - fxh2) / h
x[idx] = tmp_val

return gradient


def gradient_function(x):
return x[0] ** 2 / 10.0 + x[1] ** 2


def graph_function(x, y):
return x ** 2 / 10.0 + y ** 2


def gradient_descent(f, init_pos, learning_rate, step_num=30):
x = init_pos
list_pos = []
list_pos.append([x[0], x[1]])

for i in range(step_num):
dx = numerical_gradient(f, x)
x -= learning_rate * dx
list_pos.append(x.copy())
return np.array(list_pos)


def Momentum_gradient_descent(f, init_pos, learning_rate, step_num=30):
x = init_pos
list_pos = []
list_pos.append([x[0], x[1]])

rho = 0.99
vx = 0
for i in range(step_num):
dx = numerical_gradient(f, x)
vx = rho * vx - dx
x += learning_rate * vx
list_pos.append(x.copy())
return np.array(list_pos)


def Nesterov_gradient_descent(f, init_pos, learning_rate, step_num=30):
x = init_pos
list_pos = []
list_pos.append([x[0], x[1]])

rho = 0.9
vx = 0
for i in range(step_num):
dx = numerical_gradient(f, x)
old_v = vx
v = rho * vx - learning_rate * dx
x += -rho * old_v + (1 + rho) * v
list_pos.append(x.copy())
return np.array(list_pos)


def AdaGrad_gradient_descent(f, init_pos, learning_rate, step_num=30):
x = init_pos
list_pos = []
list_pos.append([x[0], x[1]])

grad_squared = 0
for i in range(step_num):
dx = numerical_gradient(f, x)
grad_squared += dx * dx
x -= learning_rate * dx / (np.sqrt(grad_squared) + 1e-7)
list_pos.append(pos.copy())
return np.array(list_pos)


def RMSProp_gradient_descent(f, init_pos, learning_rate, step_num=30):
x = init_pos
list_pos = []
list_pos.append([x[0], x[1]])

grad_squared = 0
decay_rate = 0.99
for i in range(step_num):
dx = numerical_gradient(f, x)
grad_squared += decay_rate * grad_squared + (1 - decay_rate) * dx * dx
x -= learning_rate * dx / (np.sqrt(grad_squared) + 1e-7)
list_pos.append(x.copy())
return np.array(list_pos)


def Adam_gradient_descent(f, init_pos, learning_rate, step_num=30):
x = init_pos
list_pos = []
list_pos.append([x[0], x[1]])

t = 1
first_moment = 0
second_moment = 0
beta1 = 0.9
beta2 = 0.999
for i in range(step_num):
dx = numerical_gradient(f, x)
first_moment = beta1 * first_moment + (1 - beta1) * dx
second_moment = beta2 * second_moment * (1 - beta2) * dx * dx
first_unbias = first_moment / (1 - beta1 ** t)
second_unbias = second_moment / (1 - beta2 ** t)
x -= learning_rate * first_unbias / (np.sqrt(second_unbias) + 1e-7)
list_pos.append(x.copy())
t += 1
return np.array(list_pos)

  

        



'Study > Deep Learning' 카테고리의 다른 글

Visiualizing t-SNE(Stochastic Neighbor Embedding)  (0) 2018.09.03
Softmax fuction 소프트맥스 함수  (0) 2018.08.08
loss function 손실 함수  (0) 2018.07.11
CNN MNIST Example Tensorflow  (0) 2018.07.04
Batch Normalization Tensorflow  (4) 2018.07.03

loss fucntion 손실 함수.

  • 신경망의 성능의 "나쁨"을 나타냄.
  • 보통 loss 함수, cost 함수라고 불림.

평균 제곱 오차 mean squared error, MSE

  • y = 신경망 출력 값.
  • t = 정답 레이블.
  • k = 데이터 차원의 수.
def mean_squared_error(y, t):
   return 0.5 * np.sum((y-t)**2)


교차 엔트로피 오차 cross entopy error, CEE

  • log = 밑이 e 인 자연로그.
  • y  = 신경망 출력 값.
  • t = 정답 레이블

def cross_entropy_error(y,t):

delta = 1e - 7

return -np.sum(t * np.log(y + delta))


'Study > Deep Learning' 카테고리의 다른 글

Softmax fuction 소프트맥스 함수  (0) 2018.08.08
Gradient Descent Optimiztion  (0) 2018.07.18
CNN MNIST Example Tensorflow  (0) 2018.07.04
Batch Normalization Tensorflow  (4) 2018.07.03
NIN tensorflow cifar-10  (0) 2018.07.02

CNN MNIST Example Code


import tensorflow as tf
import random
from tensorflow.examples.tutorials.mnist import input_data

tf.set_random_seed(777)
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
r = random.randint(0, mnist.test.num_examples - 1)

class Model:
def __init__(self, sess, name):
self.sess = sess
self.name = name
self.Layers()

# build model
def Layers(self):
with tf.variable_scope(self.name):
# set placeholder variables
self.X = tf.placeholder(tf.float32, [None, 784])
X_img = tf.reshape(self.X, [-1, 28, 28, 1])
self.Y = tf.placeholder(tf.float32, [None, 10])

# set Layers
conv1 = tf.layers.conv2d(inputs=X_img, filters=6, kernel_size=[3, 3],
padding="SAME", activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], padding="SAME", strides=2)
dropout1 = tf.layers.dropout(inputs=pool1, rate=0.3)

conv2 = tf.layers.conv2d(inputs=dropout1, filters=12, kernel_size=[3, 3],
padding="SAME", activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], padding="SAME", strides=2)
dropout2 = tf.layers.dropout(inputs=pool2, rate=0.3)

conv3 = tf.layers.conv2d(inputs=dropout2, filters=24, kernel_size=[3, 3],
padding="same", activation=tf.nn.relu)
pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=[2, 2], padding="same", strides=2)
dropout3 = tf.layers.dropout(inputs=pool3, rate=0.3)

flat = tf.reshape(dropout3, [-1, 24 * 4 * 4])
dense4 = tf.layers.dense(inputs=flat, units=100, activation=tf.nn.relu)
dropout4 = tf.layers.dropout(inputs=dense4, rate=0.5)

self.logits = tf.layers.dense(inputs=dropout4, units=10)

# loss & optimizer
self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.Y))
self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.cost)

correct_prediction = tf.equal(tf.argmax(self.logits, 1), tf.argmax(self.Y, 1))
self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

def get_predict(self, x_test, y_test):
print("Test Label: ", sess.run(tf.argmax(y_test, 1)))
print("Prediction Label: ", sess.run(tf.argmax(self.logits, 1), feed_dict={self.X: x_test}))
return

def get_accuracy(self, x_test, y_test):
print('Test Data Set Accuracy:',self.sess.run(self.accuracy, feed_dict={self.X: x_test, self.Y: y_test}))
return

def train(self, x_data, y_data):
return self.sess.run([self.cost, self.optimizer], feed_dict={self.X: x_data, self.Y: y_data})

# Main

# set learning_rate , epochs, batch_size
learning_rate = 0.001
training_epochs = 5
batch_size = 100

sess = tf.Session()
model = Model(sess, "model")
sess.run(tf.global_variables_initializer())

print("Learning start.")
for epoch in range(training_epochs):
avg_cost = 0
total_batch = int(mnist.train.num_examples / batch_size)

for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
c, _ = model.train(batch_xs, batch_ys)
avg_cost += c / total_batch
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))
print('Learning Finish!')

# Evaluation
model.get_accuracy(mnist.test.images, mnist.test.labels)
model.get_predict(mnist.test.images[r:r + 1],mnist.test.labels[r:r + 1])
sess.close()



'Study > Deep Learning' 카테고리의 다른 글

Softmax fuction 소프트맥스 함수  (0) 2018.08.08
Gradient Descent Optimiztion  (0) 2018.07.18
loss function 손실 함수  (0) 2018.07.11
Batch Normalization Tensorflow  (4) 2018.07.03
NIN tensorflow cifar-10  (0) 2018.07.02

CHAIN OF RESPONSIBILITY 책임 연쇄 패턴

  • 메세지를 보내는 객체와 메세지를 받아 처리하는 객체들 간의 결합도를 없애줌.
  • 하나의 요청에 대한 처리를 여러 객체에게 전달함.
  • 자식으로부터 부모로



#include <iostream> using namespace std; class Handler{ public: Handler(Handler* s) : _successor(s) {} virtual void HandleRequset(int i){ if(_successor != NULL) _successor->HandleRequset(i); else cout << "Handler" << endl; } private: Handler* _successor; }; class ConcreteHandler1 : public Handler{ public: ConcreteHandler1(Handler* ptrs) : Handler(ptrs) {} void HandleRequset(int i) override{ if(i == 1) cout<< "Handler 1" <<endl; else Handler::HandleRequset(i); } }; class ConcreteHandler2: public Handler{ public: ConcreteHandler2(Handler* ptrs) : Handler(ptrs) {} void HandleRequset(int i) override{ if(i == 2) cout<< "Handler 2" <<endl; else Handler::HandleRequset(i); } }; int main(){ Handler* prtHandler = new ConcreteHandler1(new ConcreteHandler2(NULL)); prtHandler->HandleRequset(1); prtHandler->HandleRequset(2); prtHandler->HandleRequset(3); return 0; }

ABSTRACT FACTORY 추상 팩토리 패턴

  • 서로 관련있거나 독리정인 여러 객체군을 생성 하는데 이용.



#include <iostream> using namespace std; class AbstractProductA{ public: virtual void print() = 0; }; class AbstractProductB{ public: virtual void print() = 0; }; class ConcreteProductA1 : public AbstractProductA{ private: void print() { cout << "ConcreteProduct A1" << endl; } }; class ConcreteProductA2: public AbstractProductA{ private: void print() { cout << "ConcreteProduct A2" << endl; } }; class ConcreteProductB1 : public AbstractProductB{ private: void print() { cout << "ConcreteProduct B1" << endl; } }; class ConcreteProductB2 : public AbstractProductB{ private: void print() { cout << "ConcreteProduct B2" << endl; } }; class AbstractFactory{ public: virtual AbstractProductA* createProductA() = 0; virtual AbstractProductB* createProductB() = 0; }; class ConcreteFactory1 : public AbstractFactory{ public: AbstractProductA* createProductA() { return new ConcreteProductA1; } AbstractProductB* createProductB() { return new ConcreteProductB1; } }; class ConcreteFactory2 : public AbstractFactory{ public: AbstractProductA* createProductA() { return new ConcreteProductA2; } AbstractProductB* createProductB() { return new ConcreteProductB2; } }; int main(){ ConcreteFactory1 factory1; ConcreteFactory2 factory2; AbstractProductA* A1 = factory1.createProductA(); A1->print(); AbstractProductB* B1 = factory1.createProductB(); B1->print(); AbstractProductA* A2 = factory2.createProductA(); A2->print(); AbstractProductB* B2 = factory2.createProductB(); B2->print(); delete A1; delete B1; delete A2; delete B2; return 0; }


템플릿을 이용한 ConcreteFactory 수 줄이기. template <typename T> class AbstractFactory{ public: virtual T* createProduct() = 0; }; template<typename T1, typename T2> class ConcreteFactory : public AbstractFactory<T1>{ public: T1* createProduct() { return new T2; } }; int main(){ ConcreteFactory<AbstractProductA,ConcreteProductA1> factory1; ConcreteFactory<AbstractProductB,ConcreteProductB1> factory2; ConcreteFactory<AbstractProductA,ConcreteProductA2> factory3; ConcreteFactory<AbstractProductB,ConcreteProductB2> factory4; AbstractProductA* a = factory1.createProduct(); AbstractProductB* b = factory2.createProduct(); AbstractProductA* c = factory3.createProduct(); AbstractProductB* d = factory4.createProduct(); a->print(); b->print(); c->print(); d->print(); }


+ Recent posts