Gradient Descent Optimiztion

  • SGD
  • Momentum
  • Nesterov
  • AdaGrad
  • RMSProp
  • Adam

import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pylab as plt
import random


def numerical_gradient(f, x):
h = 0.0001
# x와 같은 모양의 배열 gradient 생성.
gradient = np.zeros_like(x)

for idx in range(x.size):
tmp_val = x[idx]
x[idx] = tmp_val + h
fxh1 = f(x)

x[idx] = tmp_val - h
fxh2 = f(x)

gradient[idx] = (fxh1 - fxh2) / h
x[idx] = tmp_val

return gradient


def gradient_function(x):
return x[0] ** 2 / 10.0 + x[1] ** 2


def graph_function(x, y):
return x ** 2 / 10.0 + y ** 2


def gradient_descent(f, init_pos, learning_rate, step_num=30):
x = init_pos
list_pos = []
list_pos.append([x[0], x[1]])

for i in range(step_num):
dx = numerical_gradient(f, x)
x -= learning_rate * dx
list_pos.append(x.copy())
return np.array(list_pos)


def Momentum_gradient_descent(f, init_pos, learning_rate, step_num=30):
x = init_pos
list_pos = []
list_pos.append([x[0], x[1]])

rho = 0.99
vx = 0
for i in range(step_num):
dx = numerical_gradient(f, x)
vx = rho * vx - dx
x += learning_rate * vx
list_pos.append(x.copy())
return np.array(list_pos)


def Nesterov_gradient_descent(f, init_pos, learning_rate, step_num=30):
x = init_pos
list_pos = []
list_pos.append([x[0], x[1]])

rho = 0.9
vx = 0
for i in range(step_num):
dx = numerical_gradient(f, x)
old_v = vx
v = rho * vx - learning_rate * dx
x += -rho * old_v + (1 + rho) * v
list_pos.append(x.copy())
return np.array(list_pos)


def AdaGrad_gradient_descent(f, init_pos, learning_rate, step_num=30):
x = init_pos
list_pos = []
list_pos.append([x[0], x[1]])

grad_squared = 0
for i in range(step_num):
dx = numerical_gradient(f, x)
grad_squared += dx * dx
x -= learning_rate * dx / (np.sqrt(grad_squared) + 1e-7)
list_pos.append(pos.copy())
return np.array(list_pos)


def RMSProp_gradient_descent(f, init_pos, learning_rate, step_num=30):
x = init_pos
list_pos = []
list_pos.append([x[0], x[1]])

grad_squared = 0
decay_rate = 0.99
for i in range(step_num):
dx = numerical_gradient(f, x)
grad_squared += decay_rate * grad_squared + (1 - decay_rate) * dx * dx
x -= learning_rate * dx / (np.sqrt(grad_squared) + 1e-7)
list_pos.append(x.copy())
return np.array(list_pos)


def Adam_gradient_descent(f, init_pos, learning_rate, step_num=30):
x = init_pos
list_pos = []
list_pos.append([x[0], x[1]])

t = 1
first_moment = 0
second_moment = 0
beta1 = 0.9
beta2 = 0.999
for i in range(step_num):
dx = numerical_gradient(f, x)
first_moment = beta1 * first_moment + (1 - beta1) * dx
second_moment = beta2 * second_moment * (1 - beta2) * dx * dx
first_unbias = first_moment / (1 - beta1 ** t)
second_unbias = second_moment / (1 - beta2 ** t)
x -= learning_rate * first_unbias / (np.sqrt(second_unbias) + 1e-7)
list_pos.append(x.copy())
t += 1
return np.array(list_pos)

  

        



'Study > Deep Learning' 카테고리의 다른 글

Visiualizing t-SNE(Stochastic Neighbor Embedding)  (0) 2018.09.03
Softmax fuction 소프트맥스 함수  (0) 2018.08.08
loss function 손실 함수  (0) 2018.07.11
CNN MNIST Example Tensorflow  (0) 2018.07.04
Batch Normalization Tensorflow  (4) 2018.07.03

+ Recent posts