425 lines
14 KiB
Java
425 lines
14 KiB
Java
![]() |
(function (global, factory) {
|
|||
|
typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) :
|
|||
|
typeof define === 'function' && define.amd ? define(['exports'], factory) :
|
|||
|
(factory((global.fmin = global.fmin || {})));
|
|||
|
}(this, function (exports) { 'use strict';
|
|||
|
|
|||
|
/** finds the zeros of a function, given two starting points (which must
|
|||
|
* have opposite signs */
|
|||
|
function bisect(f, a, b, parameters) {
|
|||
|
parameters = parameters || {};
|
|||
|
var maxIterations = parameters.maxIterations || 100,
|
|||
|
tolerance = parameters.tolerance || 1e-10,
|
|||
|
fA = f(a),
|
|||
|
fB = f(b),
|
|||
|
delta = b - a;
|
|||
|
|
|||
|
if (fA * fB > 0) {
|
|||
|
throw "Initial bisect points must have opposite signs";
|
|||
|
}
|
|||
|
|
|||
|
if (fA === 0) return a;
|
|||
|
if (fB === 0) return b;
|
|||
|
|
|||
|
for (var i = 0; i < maxIterations; ++i) {
|
|||
|
delta /= 2;
|
|||
|
var mid = a + delta,
|
|||
|
fMid = f(mid);
|
|||
|
|
|||
|
if (fMid * fA >= 0) {
|
|||
|
a = mid;
|
|||
|
}
|
|||
|
|
|||
|
if ((Math.abs(delta) < tolerance) || (fMid === 0)) {
|
|||
|
return mid;
|
|||
|
}
|
|||
|
}
|
|||
|
return a + delta;
|
|||
|
}
|
|||
|
|
|||
|
// need some basic operations on vectors, rather than adding a dependency,
|
|||
|
// just define here
|
|||
|
function zeros(x) { var r = new Array(x); for (var i = 0; i < x; ++i) { r[i] = 0; } return r; }
|
|||
|
function zerosM(x,y) { return zeros(x).map(function() { return zeros(y); }); }
|
|||
|
|
|||
|
function dot(a, b) {
|
|||
|
var ret = 0;
|
|||
|
for (var i = 0; i < a.length; ++i) {
|
|||
|
ret += a[i] * b[i];
|
|||
|
}
|
|||
|
return ret;
|
|||
|
}
|
|||
|
|
|||
|
function norm2(a) {
|
|||
|
return Math.sqrt(dot(a, a));
|
|||
|
}
|
|||
|
|
|||
|
function scale(ret, value, c) {
|
|||
|
for (var i = 0; i < value.length; ++i) {
|
|||
|
ret[i] = value[i] * c;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
function weightedSum(ret, w1, v1, w2, v2) {
|
|||
|
for (var j = 0; j < ret.length; ++j) {
|
|||
|
ret[j] = w1 * v1[j] + w2 * v2[j];
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/** minimizes a function using the downhill simplex method */
|
|||
|
function nelderMead(f, x0, parameters) {
|
|||
|
parameters = parameters || {};
|
|||
|
|
|||
|
var maxIterations = parameters.maxIterations || x0.length * 200,
|
|||
|
nonZeroDelta = parameters.nonZeroDelta || 1.05,
|
|||
|
zeroDelta = parameters.zeroDelta || 0.001,
|
|||
|
minErrorDelta = parameters.minErrorDelta || 1e-6,
|
|||
|
minTolerance = parameters.minErrorDelta || 1e-5,
|
|||
|
rho = (parameters.rho !== undefined) ? parameters.rho : 1,
|
|||
|
chi = (parameters.chi !== undefined) ? parameters.chi : 2,
|
|||
|
psi = (parameters.psi !== undefined) ? parameters.psi : -0.5,
|
|||
|
sigma = (parameters.sigma !== undefined) ? parameters.sigma : 0.5,
|
|||
|
maxDiff;
|
|||
|
|
|||
|
// initialize simplex.
|
|||
|
var N = x0.length,
|
|||
|
simplex = new Array(N + 1);
|
|||
|
simplex[0] = x0;
|
|||
|
simplex[0].fx = f(x0);
|
|||
|
simplex[0].id = 0;
|
|||
|
for (var i = 0; i < N; ++i) {
|
|||
|
var point = x0.slice();
|
|||
|
point[i] = point[i] ? point[i] * nonZeroDelta : zeroDelta;
|
|||
|
simplex[i+1] = point;
|
|||
|
simplex[i+1].fx = f(point);
|
|||
|
simplex[i+1].id = i+1;
|
|||
|
}
|
|||
|
|
|||
|
function updateSimplex(value) {
|
|||
|
for (var i = 0; i < value.length; i++) {
|
|||
|
simplex[N][i] = value[i];
|
|||
|
}
|
|||
|
simplex[N].fx = value.fx;
|
|||
|
}
|
|||
|
|
|||
|
var sortOrder = function(a, b) { return a.fx - b.fx; };
|
|||
|
|
|||
|
var centroid = x0.slice(),
|
|||
|
reflected = x0.slice(),
|
|||
|
contracted = x0.slice(),
|
|||
|
expanded = x0.slice();
|
|||
|
|
|||
|
for (var iteration = 0; iteration < maxIterations; ++iteration) {
|
|||
|
simplex.sort(sortOrder);
|
|||
|
|
|||
|
if (parameters.history) {
|
|||
|
// copy the simplex (since later iterations will mutate) and
|
|||
|
// sort it to have a consistent order between iterations
|
|||
|
var sortedSimplex = simplex.map(function (x) {
|
|||
|
var state = x.slice();
|
|||
|
state.fx = x.fx;
|
|||
|
state.id = x.id;
|
|||
|
return state;
|
|||
|
});
|
|||
|
sortedSimplex.sort(function(a,b) { return a.id - b.id; });
|
|||
|
|
|||
|
parameters.history.push({x: simplex[0].slice(),
|
|||
|
fx: simplex[0].fx,
|
|||
|
simplex: sortedSimplex});
|
|||
|
}
|
|||
|
|
|||
|
maxDiff = 0;
|
|||
|
for (i = 0; i < N; ++i) {
|
|||
|
maxDiff = Math.max(maxDiff, Math.abs(simplex[0][i] - simplex[1][i]));
|
|||
|
}
|
|||
|
|
|||
|
if ((Math.abs(simplex[0].fx - simplex[N].fx) < minErrorDelta) &&
|
|||
|
(maxDiff < minTolerance)) {
|
|||
|
break;
|
|||
|
}
|
|||
|
|
|||
|
// compute the centroid of all but the worst point in the simplex
|
|||
|
for (i = 0; i < N; ++i) {
|
|||
|
centroid[i] = 0;
|
|||
|
for (var j = 0; j < N; ++j) {
|
|||
|
centroid[i] += simplex[j][i];
|
|||
|
}
|
|||
|
centroid[i] /= N;
|
|||
|
}
|
|||
|
|
|||
|
// reflect the worst point past the centroid and compute loss at reflected
|
|||
|
// point
|
|||
|
var worst = simplex[N];
|
|||
|
weightedSum(reflected, 1+rho, centroid, -rho, worst);
|
|||
|
reflected.fx = f(reflected);
|
|||
|
|
|||
|
// if the reflected point is the best seen, then possibly expand
|
|||
|
if (reflected.fx < simplex[0].fx) {
|
|||
|
weightedSum(expanded, 1+chi, centroid, -chi, worst);
|
|||
|
expanded.fx = f(expanded);
|
|||
|
if (expanded.fx < reflected.fx) {
|
|||
|
updateSimplex(expanded);
|
|||
|
} else {
|
|||
|
updateSimplex(reflected);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// if the reflected point is worse than the second worst, we need to
|
|||
|
// contract
|
|||
|
else if (reflected.fx >= simplex[N-1].fx) {
|
|||
|
var shouldReduce = false;
|
|||
|
|
|||
|
if (reflected.fx > worst.fx) {
|
|||
|
// do an inside contraction
|
|||
|
weightedSum(contracted, 1+psi, centroid, -psi, worst);
|
|||
|
contracted.fx = f(contracted);
|
|||
|
if (contracted.fx < worst.fx) {
|
|||
|
updateSimplex(contracted);
|
|||
|
} else {
|
|||
|
shouldReduce = true;
|
|||
|
}
|
|||
|
} else {
|
|||
|
// do an outside contraction
|
|||
|
weightedSum(contracted, 1-psi * rho, centroid, psi*rho, worst);
|
|||
|
contracted.fx = f(contracted);
|
|||
|
if (contracted.fx < reflected.fx) {
|
|||
|
updateSimplex(contracted);
|
|||
|
} else {
|
|||
|
shouldReduce = true;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if (shouldReduce) {
|
|||
|
// if we don't contract here, we're done
|
|||
|
if (sigma >= 1) break;
|
|||
|
|
|||
|
// do a reduction
|
|||
|
for (i = 1; i < simplex.length; ++i) {
|
|||
|
weightedSum(simplex[i], 1 - sigma, simplex[0], sigma, simplex[i]);
|
|||
|
simplex[i].fx = f(simplex[i]);
|
|||
|
}
|
|||
|
}
|
|||
|
} else {
|
|||
|
updateSimplex(reflected);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
simplex.sort(sortOrder);
|
|||
|
return {fx : simplex[0].fx,
|
|||
|
x : simplex[0]};
|
|||
|
}
|
|||
|
|
|||
|
/// searches along line 'pk' for a point that satifies the wolfe conditions
|
|||
|
/// See 'Numerical Optimization' by Nocedal and Wright p59-60
|
|||
|
/// f : objective function
|
|||
|
/// pk : search direction
|
|||
|
/// current: object containing current gradient/loss
|
|||
|
/// next: output: contains next gradient/loss
|
|||
|
/// returns a: step size taken
|
|||
|
function wolfeLineSearch(f, pk, current, next, a, c1, c2) {
|
|||
|
var phi0 = current.fx, phiPrime0 = dot(current.fxprime, pk),
|
|||
|
phi = phi0, phi_old = phi0,
|
|||
|
phiPrime = phiPrime0,
|
|||
|
a0 = 0;
|
|||
|
|
|||
|
a = a || 1;
|
|||
|
c1 = c1 || 1e-6;
|
|||
|
c2 = c2 || 0.1;
|
|||
|
|
|||
|
function zoom(a_lo, a_high, phi_lo) {
|
|||
|
for (var iteration = 0; iteration < 16; ++iteration) {
|
|||
|
a = (a_lo + a_high)/2;
|
|||
|
weightedSum(next.x, 1.0, current.x, a, pk);
|
|||
|
phi = next.fx = f(next.x, next.fxprime);
|
|||
|
phiPrime = dot(next.fxprime, pk);
|
|||
|
|
|||
|
if ((phi > (phi0 + c1 * a * phiPrime0)) ||
|
|||
|
(phi >= phi_lo)) {
|
|||
|
a_high = a;
|
|||
|
|
|||
|
} else {
|
|||
|
if (Math.abs(phiPrime) <= -c2 * phiPrime0) {
|
|||
|
return a;
|
|||
|
}
|
|||
|
|
|||
|
if (phiPrime * (a_high - a_lo) >=0) {
|
|||
|
a_high = a_lo;
|
|||
|
}
|
|||
|
|
|||
|
a_lo = a;
|
|||
|
phi_lo = phi;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return 0;
|
|||
|
}
|
|||
|
|
|||
|
for (var iteration = 0; iteration < 10; ++iteration) {
|
|||
|
weightedSum(next.x, 1.0, current.x, a, pk);
|
|||
|
phi = next.fx = f(next.x, next.fxprime);
|
|||
|
phiPrime = dot(next.fxprime, pk);
|
|||
|
if ((phi > (phi0 + c1 * a * phiPrime0)) ||
|
|||
|
(iteration && (phi >= phi_old))) {
|
|||
|
return zoom(a0, a, phi_old);
|
|||
|
}
|
|||
|
|
|||
|
if (Math.abs(phiPrime) <= -c2 * phiPrime0) {
|
|||
|
return a;
|
|||
|
}
|
|||
|
|
|||
|
if (phiPrime >= 0 ) {
|
|||
|
return zoom(a, a0, phi);
|
|||
|
}
|
|||
|
|
|||
|
phi_old = phi;
|
|||
|
a0 = a;
|
|||
|
a *= 2;
|
|||
|
}
|
|||
|
|
|||
|
return a;
|
|||
|
}
|
|||
|
|
|||
|
function conjugateGradient(f, initial, params) {
|
|||
|
// allocate all memory up front here, keep out of the loop for perfomance
|
|||
|
// reasons
|
|||
|
var current = {x: initial.slice(), fx: 0, fxprime: initial.slice()},
|
|||
|
next = {x: initial.slice(), fx: 0, fxprime: initial.slice()},
|
|||
|
yk = initial.slice(),
|
|||
|
pk, temp,
|
|||
|
a = 1,
|
|||
|
maxIterations;
|
|||
|
|
|||
|
params = params || {};
|
|||
|
maxIterations = params.maxIterations || initial.length * 20;
|
|||
|
|
|||
|
current.fx = f(current.x, current.fxprime);
|
|||
|
pk = current.fxprime.slice();
|
|||
|
scale(pk, current.fxprime,-1);
|
|||
|
|
|||
|
for (var i = 0; i < maxIterations; ++i) {
|
|||
|
a = wolfeLineSearch(f, pk, current, next, a);
|
|||
|
|
|||
|
// todo: history in wrong spot?
|
|||
|
if (params.history) {
|
|||
|
params.history.push({x: current.x.slice(),
|
|||
|
fx: current.fx,
|
|||
|
fxprime: current.fxprime.slice(),
|
|||
|
alpha: a});
|
|||
|
}
|
|||
|
|
|||
|
if (!a) {
|
|||
|
// faiiled to find point that satifies wolfe conditions.
|
|||
|
// reset direction for next iteration
|
|||
|
scale(pk, current.fxprime, -1);
|
|||
|
|
|||
|
} else {
|
|||
|
// update direction using Polak–Ribiere CG method
|
|||
|
weightedSum(yk, 1, next.fxprime, -1, current.fxprime);
|
|||
|
|
|||
|
var delta_k = dot(current.fxprime, current.fxprime),
|
|||
|
beta_k = Math.max(0, dot(yk, next.fxprime) / delta_k);
|
|||
|
|
|||
|
weightedSum(pk, beta_k, pk, -1, next.fxprime);
|
|||
|
|
|||
|
temp = current;
|
|||
|
current = next;
|
|||
|
next = temp;
|
|||
|
}
|
|||
|
|
|||
|
if (norm2(current.fxprime) <= 1e-5) {
|
|||
|
break;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if (params.history) {
|
|||
|
params.history.push({x: current.x.slice(),
|
|||
|
fx: current.fx,
|
|||
|
fxprime: current.fxprime.slice(),
|
|||
|
alpha: a});
|
|||
|
}
|
|||
|
|
|||
|
return current;
|
|||
|
}
|
|||
|
|
|||
|
function gradientDescent(f, initial, params) {
|
|||
|
params = params || {};
|
|||
|
var maxIterations = params.maxIterations || initial.length * 100,
|
|||
|
learnRate = params.learnRate || 0.001,
|
|||
|
current = {x: initial.slice(), fx: 0, fxprime: initial.slice()};
|
|||
|
|
|||
|
for (var i = 0; i < maxIterations; ++i) {
|
|||
|
current.fx = f(current.x, current.fxprime);
|
|||
|
if (params.history) {
|
|||
|
params.history.push({x: current.x.slice(),
|
|||
|
fx: current.fx,
|
|||
|
fxprime: current.fxprime.slice()});
|
|||
|
}
|
|||
|
|
|||
|
weightedSum(current.x, 1, current.x, -learnRate, current.fxprime);
|
|||
|
if (norm2(current.fxprime) <= 1e-5) {
|
|||
|
break;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return current;
|
|||
|
}
|
|||
|
|
|||
|
function gradientDescentLineSearch(f, initial, params) {
|
|||
|
params = params || {};
|
|||
|
var current = {x: initial.slice(), fx: 0, fxprime: initial.slice()},
|
|||
|
next = {x: initial.slice(), fx: 0, fxprime: initial.slice()},
|
|||
|
maxIterations = params.maxIterations || initial.length * 100,
|
|||
|
learnRate = params.learnRate || 1,
|
|||
|
pk = initial.slice(),
|
|||
|
c1 = params.c1 || 1e-3,
|
|||
|
c2 = params.c2 || 0.1,
|
|||
|
temp,
|
|||
|
functionCalls = [];
|
|||
|
|
|||
|
if (params.history) {
|
|||
|
// wrap the function call to track linesearch samples
|
|||
|
var inner = f;
|
|||
|
f = function(x, fxprime) {
|
|||
|
functionCalls.push(x.slice());
|
|||
|
return inner(x, fxprime);
|
|||
|
};
|
|||
|
}
|
|||
|
|
|||
|
current.fx = f(current.x, current.fxprime);
|
|||
|
for (var i = 0; i < maxIterations; ++i) {
|
|||
|
scale(pk, current.fxprime, -1);
|
|||
|
learnRate = wolfeLineSearch(f, pk, current, next, learnRate, c1, c2);
|
|||
|
|
|||
|
if (params.history) {
|
|||
|
params.history.push({x: current.x.slice(),
|
|||
|
fx: current.fx,
|
|||
|
fxprime: current.fxprime.slice(),
|
|||
|
functionCalls: functionCalls,
|
|||
|
learnRate: learnRate,
|
|||
|
alpha: learnRate});
|
|||
|
functionCalls = [];
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
temp = current;
|
|||
|
current = next;
|
|||
|
next = temp;
|
|||
|
|
|||
|
if ((learnRate === 0) || (norm2(current.fxprime) < 1e-5)) break;
|
|||
|
}
|
|||
|
|
|||
|
return current;
|
|||
|
}
|
|||
|
|
|||
|
exports.bisect = bisect;
|
|||
|
exports.nelderMead = nelderMead;
|
|||
|
exports.conjugateGradient = conjugateGradient;
|
|||
|
exports.gradientDescent = gradientDescent;
|
|||
|
exports.gradientDescentLineSearch = gradientDescentLineSearch;
|
|||
|
exports.zeros = zeros;
|
|||
|
exports.zerosM = zerosM;
|
|||
|
exports.norm2 = norm2;
|
|||
|
exports.weightedSum = weightedSum;
|
|||
|
exports.scale = scale;
|
|||
|
|
|||
|
}));
|