Gradient Descent Optimiztion

2018. 7. 18. 22:07

Gradient Descent Optimiztion

SGD
Momentum
Nesterov
AdaGrad
RMSProp
Adam

https://github.com/MOONJOOYOUNG/DeepLearning/tree/master/Optimiztion

import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pylab as plt
import random


def numerical_gradient(f, x):
    h = 0.0001
    # x와 같은 모양의 배열 gradient 생성.
    gradient = np.zeros_like(x)

    for idx in range(x.size):
        tmp_val = x[idx]
        x[idx] = tmp_val + h
        fxh1 = f(x)

        x[idx] = tmp_val - h
        fxh2 = f(x)

        gradient[idx] = (fxh1 - fxh2) / h
        x[idx] = tmp_val

    return gradient


def gradient_function(x):
    return x[0] ** 2 / 10.0 + x[1] ** 2


def graph_function(x, y):
    return x ** 2 / 10.0 + y ** 2


def gradient_descent(f, init_pos, learning_rate, step_num=30):
    x = init_pos
    list_pos = []
    list_pos.append([x[0], x[1]])

    for i in range(step_num):
        dx = numerical_gradient(f, x)
        x -= learning_rate * dx
        list_pos.append(x.copy())
    return np.array(list_pos)


def Momentum_gradient_descent(f, init_pos, learning_rate, step_num=30):
    x = init_pos
    list_pos = []
    list_pos.append([x[0], x[1]])

    rho = 0.99
    vx = 0
    for i in range(step_num):
        dx = numerical_gradient(f, x)
        vx = rho * vx - dx
        x += learning_rate * vx
        list_pos.append(x.copy())
    return np.array(list_pos)


def Nesterov_gradient_descent(f, init_pos, learning_rate, step_num=30):
    x = init_pos
    list_pos = []
    list_pos.append([x[0], x[1]])

    rho = 0.9
    vx = 0
    for i in range(step_num):
        dx = numerical_gradient(f, x)
        old_v = vx
        v = rho * vx - learning_rate * dx
        x += -rho * old_v + (1 + rho) * v
        list_pos.append(x.copy())
    return np.array(list_pos)


def AdaGrad_gradient_descent(f, init_pos, learning_rate, step_num=30):
    x = init_pos
    list_pos = []
    list_pos.append([x[0], x[1]])

    grad_squared = 0
    for i in range(step_num):
        dx = numerical_gradient(f, x)
        grad_squared += dx * dx
        x -= learning_rate * dx / (np.sqrt(grad_squared) + 1e-7)
        list_pos.append(pos.copy())
    return np.array(list_pos)


def RMSProp_gradient_descent(f, init_pos, learning_rate, step_num=30):
    x = init_pos
    list_pos = []
    list_pos.append([x[0], x[1]])

    grad_squared = 0
    decay_rate = 0.99
    for i in range(step_num):
        dx = numerical_gradient(f, x)
        grad_squared += decay_rate * grad_squared + (1 - decay_rate) * dx * dx
        x -= learning_rate * dx / (np.sqrt(grad_squared) + 1e-7)
        list_pos.append(x.copy())
    return np.array(list_pos)


def Adam_gradient_descent(f, init_pos, learning_rate, step_num=30):
    x = init_pos
    list_pos = []
    list_pos.append([x[0], x[1]])

    t = 1
    first_moment = 0
    second_moment = 0
    beta1 = 0.9
    beta2 = 0.999
    for i in range(step_num):
        dx = numerical_gradient(f, x)
        first_moment = beta1 * first_moment + (1 - beta1) * dx
        second_moment = beta2 * second_moment * (1 - beta2) * dx * dx
        first_unbias = first_moment / (1 - beta1 ** t)
        second_unbias = second_moment / (1 - beta2 ** t)
        x -= learning_rate * first_unbias / (np.sqrt(second_unbias) + 1e-7)
        list_pos.append(x.copy())
        t += 1
    return np.array(list_pos)

'Study > Deep Learning' 카테고리의 다른 글

Visiualizing t-SNE(Stochastic Neighbor Embedding) (0)	2018.09.03
Softmax fuction 소프트맥스 함수 (0)	2018.08.08
loss function 손실 함수 (0)	2018.07.11
CNN MNIST Example Tensorflow (0)	2018.07.04
Batch Normalization Tensorflow (4)	2018.07.03

Deeppp

Gradient Descent Optimiztion

'Study > Deep Learning' 카테고리의 다른 글

+ Recent posts

티스토리툴바