We've been working in class on optimization methods, and were asked to implement a quasi-Newtonian algorithm to find the minimum of the function: $f(x, y) = x^2 + y^2$ using the David-Fletcher-Powell method to approximate the hessian of $f$ and Armijo's rule to find the optimal value of alpha at every step. The following is my python code for the problem:
from random import uniform
import numpy as np
import scipy.optimize as spo
from time import time
from numdifftools import Gradient
--------------------------- Objective function: f(x, y) = (x + 1)^2 + y^2 -------------------------------
number of variables
n_variables = 2
def f(X):
return (X[0] + 1)2 + X[1]2
--------------------------- Armijou -------------------------------
def armijo(func, X, d):
# gradient object
gradient = Gradient(func)
# argmin function and its derivative
phi = lambda a: func(X - a * d)
phi_prime_0 = d.T @ gradient(X)
# parameters
epsilon = .6
eta = 2
# random starting alpha
alpha = uniform(0, 1)
# if condition 3 is verfied
if phi(alpha) <= phi(0) + (epsilon * phi_prime_0 * alpha):
while phi(eta * alpha) <= phi(0) + (epsilon * phi_prime_0 * eta * alpha):
alpha *= eta
else:
while phi(alpha) > (epsilon * alpha * phi_prime_0) + phi(0):
alpha /= eta
return alpha
--------------------------- Quasi-Newton Method -------------------
def quasi_newton(func, start, finish):
# method parameters
epsilon = 1e-12
max_iter = 100
# gradient object
gradient = Gradient(func)
# initial values
X = [uniform(start, finish) for _ in range(n_variables)]
g = gradient(X).T
H = np.identity(n_variables)
# iteration counter
iter = 0
# main loop
while np.linalg.norm(g) > epsilon and iter < max_iter:
# compute direction
d = -H @ g
# find optimal value for alpha
alpha = armijo(func, X, d)
# save previous value of X
old_X = X
# update values
X = X - alpha * d
g = gradient(X).T
# use DFP to approximate hessian
y = gradient(X) - gradient(old_X)
A = alpha * (d @ d.T) / (d.T @ y)
B = -((H @ y) @ (H @ y).T) / (y.T @ H @ y)
H = H + A + B
iter += 1
if iter == max_iter: print("Max iterations reached!")
return X
--------------------------- Program -------------------------------
def main():
# bounds
low_bound = -100
high_bound = 100
# testing method
print("--------------- Quasi-Newton's Method ---------------------")
quasi_X = np.zeros((n_variables, 1))
start = time()
quasi_X = quasi_newton(f, low_bound, high_bound)
finish = time()
quasi_time = finish - start
print("Minimum:", f(quasi_X))
print("Found at position:")
print(quasi_X)
print("Time: ", quasi_time, " seconds")
--------------------------- Execution -------------------------------
if name == "main":
main()
I know for a fact that the implementation of the quasi-Newtonian method is working, as when I use scipy to find the optimal value of alpha, the algorithm does converge. The problem arises when I try finding alpha using my implementation of Armijo's rule, as the alpha found is way too small which results in division by zero.