Submit
Path:
~
/
/
proc
/
thread-self
/
root
/
opt
/
alt
/
python35
/
lib64
/
python3.5
/
site-packages
/
scipy
/
optimize
/
File Content:
linesearch.py
""" Functions --------- .. autosummary:: :toctree: generated/ line_search_armijo line_search_wolfe1 line_search_wolfe2 scalar_search_wolfe1 scalar_search_wolfe2 """ from __future__ import division, print_function, absolute_import from warnings import warn from scipy.optimize import minpack2 import numpy as np from scipy._lib.six import xrange __all__ = ['LineSearchWarning', 'line_search_wolfe1', 'line_search_wolfe2', 'scalar_search_wolfe1', 'scalar_search_wolfe2', 'line_search_armijo'] class LineSearchWarning(RuntimeWarning): pass #------------------------------------------------------------------------------ # Minpack's Wolfe line and scalar searches #------------------------------------------------------------------------------ def line_search_wolfe1(f, fprime, xk, pk, gfk=None, old_fval=None, old_old_fval=None, args=(), c1=1e-4, c2=0.9, amax=50, amin=1e-8, xtol=1e-14): """ As `scalar_search_wolfe1` but do a line search to direction `pk` Parameters ---------- f : callable Function `f(x)` fprime : callable Gradient of `f` xk : array_like Current point pk : array_like Search direction gfk : array_like, optional Gradient of `f` at point `xk` old_fval : float, optional Value of `f` at point `xk` old_old_fval : float, optional Value of `f` at point preceding `xk` The rest of the parameters are the same as for `scalar_search_wolfe1`. Returns ------- stp, f_count, g_count, fval, old_fval As in `line_search_wolfe1` gval : array Gradient of `f` at the final point """ if gfk is None: gfk = fprime(xk) if isinstance(fprime, tuple): eps = fprime[1] fprime = fprime[0] newargs = (f, eps) + args gradient = False else: newargs = args gradient = True gval = [gfk] gc = [0] fc = [0] def phi(s): fc[0] += 1 return f(xk + s*pk, *args) def derphi(s): gval[0] = fprime(xk + s*pk, *newargs) if gradient: gc[0] += 1 else: fc[0] += len(xk) + 1 return np.dot(gval[0], pk) derphi0 = np.dot(gfk, pk) stp, fval, old_fval = scalar_search_wolfe1( phi, derphi, old_fval, old_old_fval, derphi0, c1=c1, c2=c2, amax=amax, amin=amin, xtol=xtol) return stp, fc[0], gc[0], fval, old_fval, gval[0] def scalar_search_wolfe1(phi, derphi, phi0=None, old_phi0=None, derphi0=None, c1=1e-4, c2=0.9, amax=50, amin=1e-8, xtol=1e-14): """ Scalar function search for alpha that satisfies strong Wolfe conditions alpha > 0 is assumed to be a descent direction. Parameters ---------- phi : callable phi(alpha) Function at point `alpha` derphi : callable dphi(alpha) Derivative `d phi(alpha)/ds`. Returns a scalar. phi0 : float, optional Value of `f` at 0 old_phi0 : float, optional Value of `f` at the previous point derphi0 : float, optional Value `derphi` at 0 c1, c2 : float, optional Wolfe parameters amax, amin : float, optional Maximum and minimum step size xtol : float, optional Relative tolerance for an acceptable step. Returns ------- alpha : float Step size, or None if no suitable step was found phi : float Value of `phi` at the new point `alpha` phi0 : float Value of `phi` at `alpha=0` Notes ----- Uses routine DCSRCH from MINPACK. """ if phi0 is None: phi0 = phi(0.) if derphi0 is None: derphi0 = derphi(0.) if old_phi0 is not None and derphi0 != 0: alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0) if alpha1 < 0: alpha1 = 1.0 else: alpha1 = 1.0 phi1 = phi0 derphi1 = derphi0 isave = np.zeros((2,), np.intc) dsave = np.zeros((13,), float) task = b'START' maxiter = 100 for i in xrange(maxiter): stp, phi1, derphi1, task = minpack2.dcsrch(alpha1, phi1, derphi1, c1, c2, xtol, task, amin, amax, isave, dsave) if task[:2] == b'FG': alpha1 = stp phi1 = phi(stp) derphi1 = derphi(stp) else: break else: # maxiter reached, the line search did not converge stp = None if task[:5] == b'ERROR' or task[:4] == b'WARN': stp = None # failed return stp, phi1, phi0 line_search = line_search_wolfe1 #------------------------------------------------------------------------------ # Pure-Python Wolfe line and scalar searches #------------------------------------------------------------------------------ def line_search_wolfe2(f, myfprime, xk, pk, gfk=None, old_fval=None, old_old_fval=None, args=(), c1=1e-4, c2=0.9, amax=50): """Find alpha that satisfies strong Wolfe conditions. Parameters ---------- f : callable f(x,*args) Objective function. myfprime : callable f'(x,*args) Objective function gradient. xk : ndarray Starting point. pk : ndarray Search direction. gfk : ndarray, optional Gradient value for x=xk (xk being the current parameter estimate). Will be recomputed if omitted. old_fval : float, optional Function value for x=xk. Will be recomputed if omitted. old_old_fval : float, optional Function value for the point preceding x=xk args : tuple, optional Additional arguments passed to objective function. c1 : float, optional Parameter for Armijo condition rule. c2 : float, optional Parameter for curvature condition rule. amax : float, optional Maximum step size Returns ------- alpha : float or None Alpha for which ``x_new = x0 + alpha * pk``, or None if the line search algorithm did not converge. fc : int Number of function evaluations made. gc : int Number of gradient evaluations made. new_fval : float or None New function value ``f(x_new)=f(x0+alpha*pk)``, or None if the line search algorithm did not converge. old_fval : float Old function value ``f(x0)``. new_slope : float or None The local slope along the search direction at the new value ``<myfprime(x_new), pk>``, or None if the line search algorithm did not converge. Notes ----- Uses the line search algorithm to enforce strong Wolfe conditions. See Wright and Nocedal, 'Numerical Optimization', 1999, pg. 59-60. For the zoom phase it uses an algorithm by [...]. """ fc = [0] gc = [0] gval = [None] def phi(alpha): fc[0] += 1 return f(xk + alpha * pk, *args) if isinstance(myfprime, tuple): def derphi(alpha): fc[0] += len(xk) + 1 eps = myfprime[1] fprime = myfprime[0] newargs = (f, eps) + args gval[0] = fprime(xk + alpha * pk, *newargs) # store for later use return np.dot(gval[0], pk) else: fprime = myfprime def derphi(alpha): gc[0] += 1 gval[0] = fprime(xk + alpha * pk, *args) # store for later use return np.dot(gval[0], pk) if gfk is None: gfk = fprime(xk, *args) derphi0 = np.dot(gfk, pk) alpha_star, phi_star, old_fval, derphi_star = scalar_search_wolfe2( phi, derphi, old_fval, old_old_fval, derphi0, c1, c2, amax) if derphi_star is None: warn('The line search algorithm did not converge', LineSearchWarning) else: # derphi_star is a number (derphi) -- so use the most recently # calculated gradient used in computing it derphi = gfk*pk # this is the gradient at the next step no need to compute it # again in the outer loop. derphi_star = gval[0] return alpha_star, fc[0], gc[0], phi_star, old_fval, derphi_star def scalar_search_wolfe2(phi, derphi=None, phi0=None, old_phi0=None, derphi0=None, c1=1e-4, c2=0.9, amax=50): """Find alpha that satisfies strong Wolfe conditions. alpha > 0 is assumed to be a descent direction. Parameters ---------- phi : callable f(x) Objective scalar function. derphi : callable f'(x), optional Objective function derivative (can be None) phi0 : float, optional Value of phi at s=0 old_phi0 : float, optional Value of phi at previous point derphi0 : float, optional Value of derphi at s=0 c1 : float, optional Parameter for Armijo condition rule. c2 : float, optional Parameter for curvature condition rule. amax : float, optional Maximum step size Returns ------- alpha_star : float or None Best alpha, or None if the line search algorithm did not converge. phi_star : float phi at alpha_star phi0 : float phi at 0 derphi_star : float or None derphi at alpha_star, or None if the line search algorithm did not converge. Notes ----- Uses the line search algorithm to enforce strong Wolfe conditions. See Wright and Nocedal, 'Numerical Optimization', 1999, pg. 59-60. For the zoom phase it uses an algorithm by [...]. """ if phi0 is None: phi0 = phi(0.) if derphi0 is None and derphi is not None: derphi0 = derphi(0.) alpha0 = 0 if old_phi0 is not None and derphi0 != 0: alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0) else: alpha1 = 1.0 if alpha1 < 0: alpha1 = 1.0 if alpha1 == 0: # This shouldn't happen. Perhaps the increment has slipped below # machine precision? For now, set the return variables skip the # useless while loop, and raise warnflag=2 due to possible imprecision. alpha_star = None phi_star = phi0 phi0 = old_phi0 derphi_star = None phi_a1 = phi(alpha1) #derphi_a1 = derphi(alpha1) evaluated below phi_a0 = phi0 derphi_a0 = derphi0 i = 1 maxiter = 10 for i in xrange(maxiter): if alpha1 == 0: break if (phi_a1 > phi0 + c1 * alpha1 * derphi0) or \ ((phi_a1 >= phi_a0) and (i > 1)): alpha_star, phi_star, derphi_star = \ _zoom(alpha0, alpha1, phi_a0, phi_a1, derphi_a0, phi, derphi, phi0, derphi0, c1, c2) break derphi_a1 = derphi(alpha1) if (abs(derphi_a1) <= -c2*derphi0): alpha_star = alpha1 phi_star = phi_a1 derphi_star = derphi_a1 break if (derphi_a1 >= 0): alpha_star, phi_star, derphi_star = \ _zoom(alpha1, alpha0, phi_a1, phi_a0, derphi_a1, phi, derphi, phi0, derphi0, c1, c2) break alpha2 = 2 * alpha1 # increase by factor of two on each iteration i = i + 1 alpha0 = alpha1 alpha1 = alpha2 phi_a0 = phi_a1 phi_a1 = phi(alpha1) derphi_a0 = derphi_a1 else: # stopping test maxiter reached alpha_star = alpha1 phi_star = phi_a1 derphi_star = None warn('The line search algorithm did not converge', LineSearchWarning) return alpha_star, phi_star, phi0, derphi_star def _cubicmin(a, fa, fpa, b, fb, c, fc): """ Finds the minimizer for a cubic polynomial that goes through the points (a,fa), (b,fb), and (c,fc) with derivative at a of fpa. If no minimizer can be found return None """ # f(x) = A *(x-a)^3 + B*(x-a)^2 + C*(x-a) + D with np.errstate(divide='raise', over='raise', invalid='raise'): try: C = fpa db = b - a dc = c - a denom = (db * dc) ** 2 * (db - dc) d1 = np.empty((2, 2)) d1[0, 0] = dc ** 2 d1[0, 1] = -db ** 2 d1[1, 0] = -dc ** 3 d1[1, 1] = db ** 3 [A, B] = np.dot(d1, np.asarray([fb - fa - C * db, fc - fa - C * dc]).flatten()) A /= denom B /= denom radical = B * B - 3 * A * C xmin = a + (-B + np.sqrt(radical)) / (3 * A) except ArithmeticError: return None if not np.isfinite(xmin): return None return xmin def _quadmin(a, fa, fpa, b, fb): """ Finds the minimizer for a quadratic polynomial that goes through the points (a,fa), (b,fb) with derivative at a of fpa, """ # f(x) = B*(x-a)^2 + C*(x-a) + D with np.errstate(divide='raise', over='raise', invalid='raise'): try: D = fa C = fpa db = b - a * 1.0 B = (fb - D - C * db) / (db * db) xmin = a - C / (2.0 * B) except ArithmeticError: return None if not np.isfinite(xmin): return None return xmin def _zoom(a_lo, a_hi, phi_lo, phi_hi, derphi_lo, phi, derphi, phi0, derphi0, c1, c2): """ Part of the optimization algorithm in `scalar_search_wolfe2`. """ maxiter = 10 i = 0 delta1 = 0.2 # cubic interpolant check delta2 = 0.1 # quadratic interpolant check phi_rec = phi0 a_rec = 0 while True: # interpolate to find a trial step length between a_lo and # a_hi Need to choose interpolation here. Use cubic # interpolation and then if the result is within delta * # dalpha or outside of the interval bounded by a_lo or a_hi # then use quadratic interpolation, if the result is still too # close, then use bisection dalpha = a_hi - a_lo if dalpha < 0: a, b = a_hi, a_lo else: a, b = a_lo, a_hi # minimizer of cubic interpolant # (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi) # # if the result is too close to the end points (or out of the # interval) then use quadratic interpolation with phi_lo, # derphi_lo and phi_hi if the result is stil too close to the # end points (or out of the interval) then use bisection if (i > 0): cchk = delta1 * dalpha a_j = _cubicmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi, a_rec, phi_rec) if (i == 0) or (a_j is None) or (a_j > b - cchk) or (a_j < a + cchk): qchk = delta2 * dalpha a_j = _quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi) if (a_j is None) or (a_j > b-qchk) or (a_j < a+qchk): a_j = a_lo + 0.5*dalpha # Check new value of a_j phi_aj = phi(a_j) if (phi_aj > phi0 + c1*a_j*derphi0) or (phi_aj >= phi_lo): phi_rec = phi_hi a_rec = a_hi a_hi = a_j phi_hi = phi_aj else: derphi_aj = derphi(a_j) if abs(derphi_aj) <= -c2*derphi0: a_star = a_j val_star = phi_aj valprime_star = derphi_aj break if derphi_aj*(a_hi - a_lo) >= 0: phi_rec = phi_hi a_rec = a_hi a_hi = a_lo phi_hi = phi_lo else: phi_rec = phi_lo a_rec = a_lo a_lo = a_j phi_lo = phi_aj derphi_lo = derphi_aj i += 1 if (i > maxiter): # Failed to find a conforming step size a_star = None val_star = None valprime_star = None break return a_star, val_star, valprime_star #------------------------------------------------------------------------------ # Armijo line and scalar searches #------------------------------------------------------------------------------ def line_search_armijo(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1): """Minimize over alpha, the function ``f(xk+alpha pk)``. Parameters ---------- f : callable Function to be minimized. xk : array_like Current point. pk : array_like Search direction. gfk : array_like Gradient of `f` at point `xk`. old_fval : float Value of `f` at point `xk`. args : tuple, optional Optional arguments. c1 : float, optional Value to control stopping criterion. alpha0 : scalar, optional Value of `alpha` at start of the optimization. Returns ------- alpha f_count f_val_at_alpha Notes ----- Uses the interpolation algorithm (Armijo backtracking) as suggested by Wright and Nocedal in 'Numerical Optimization', 1999, pg. 56-57 """ xk = np.atleast_1d(xk) fc = [0] def phi(alpha1): fc[0] += 1 return f(xk + alpha1*pk, *args) if old_fval is None: phi0 = phi(0.) else: phi0 = old_fval # compute f(xk) -- done in past loop derphi0 = np.dot(gfk, pk) alpha, phi1 = scalar_search_armijo(phi, phi0, derphi0, c1=c1, alpha0=alpha0) return alpha, fc[0], phi1 def line_search_BFGS(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1): """ Compatibility wrapper for `line_search_armijo` """ r = line_search_armijo(f, xk, pk, gfk, old_fval, args=args, c1=c1, alpha0=alpha0) return r[0], r[1], 0, r[2] def scalar_search_armijo(phi, phi0, derphi0, c1=1e-4, alpha0=1, amin=0): """Minimize over alpha, the function ``phi(alpha)``. Uses the interpolation algorithm (Armijo backtracking) as suggested by Wright and Nocedal in 'Numerical Optimization', 1999, pg. 56-57 alpha > 0 is assumed to be a descent direction. Returns ------- alpha phi1 """ phi_a0 = phi(alpha0) if phi_a0 <= phi0 + c1*alpha0*derphi0: return alpha0, phi_a0 # Otherwise compute the minimizer of a quadratic interpolant: alpha1 = -(derphi0) * alpha0**2 / 2.0 / (phi_a0 - phi0 - derphi0 * alpha0) phi_a1 = phi(alpha1) if (phi_a1 <= phi0 + c1*alpha1*derphi0): return alpha1, phi_a1 # Otherwise loop with cubic interpolation until we find an alpha which # satifies the first Wolfe condition (since we are backtracking, we will # assume that the value of alpha is not too small and satisfies the second # condition. while alpha1 > amin: # we are assuming alpha>0 is a descent direction factor = alpha0**2 * alpha1**2 * (alpha1-alpha0) a = alpha0**2 * (phi_a1 - phi0 - derphi0*alpha1) - \ alpha1**2 * (phi_a0 - phi0 - derphi0*alpha0) a = a / factor b = -alpha0**3 * (phi_a1 - phi0 - derphi0*alpha1) + \ alpha1**3 * (phi_a0 - phi0 - derphi0*alpha0) b = b / factor alpha2 = (-b + np.sqrt(abs(b**2 - 3 * a * derphi0))) / (3.0*a) phi_a2 = phi(alpha2) if (phi_a2 <= phi0 + c1*alpha2*derphi0): return alpha2, phi_a2 if (alpha1 - alpha2) > alpha1 / 2.0 or (1 - alpha2/alpha1) < 0.96: alpha2 = alpha1 / 2.0 alpha0 = alpha1 alpha1 = alpha2 phi_a0 = phi_a1 phi_a1 = phi_a2 # Failed to find a suitable step length return None, phi_a1 #------------------------------------------------------------------------------ # Non-monotone line search for DF-SANE #------------------------------------------------------------------------------ def _nonmonotone_line_search_cruz(f, x_k, d, prev_fs, eta, gamma=1e-4, tau_min=0.1, tau_max=0.5): """ Nonmonotone backtracking line search as described in [1]_ Parameters ---------- f : callable Function returning a tuple ``(f, F)`` where ``f`` is the value of a merit function and ``F`` the residual. x_k : ndarray Initial position d : ndarray Search direction prev_fs : float List of previous merit function values. Should have ``len(prev_fs) <= M`` where ``M`` is the nonmonotonicity window parameter. eta : float Allowed merit function increase, see [1]_ gamma, tau_min, tau_max : float, optional Search parameters, see [1]_ Returns ------- alpha : float Step length xp : ndarray Next position fp : float Merit function value at next position Fp : ndarray Residual at next position References ---------- [1] "Spectral residual method without gradient information for solving large-scale nonlinear systems of equations." W. La Cruz, J.M. Martinez, M. Raydan. Math. Comp. **75**, 1429 (2006). """ f_k = prev_fs[-1] f_bar = max(prev_fs) alpha_p = 1 alpha_m = 1 alpha = 1 while True: xp = x_k + alpha_p * d fp, Fp = f(xp) if fp <= f_bar + eta - gamma * alpha_p**2 * f_k: alpha = alpha_p break alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k) xp = x_k - alpha_m * d fp, Fp = f(xp) if fp <= f_bar + eta - gamma * alpha_m**2 * f_k: alpha = -alpha_m break alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k) alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p) alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m) return alpha, xp, fp, Fp def _nonmonotone_line_search_cheng(f, x_k, d, f_k, C, Q, eta, gamma=1e-4, tau_min=0.1, tau_max=0.5, nu=0.85): """ Nonmonotone line search from [1] Parameters ---------- f : callable Function returning a tuple ``(f, F)`` where ``f`` is the value of a merit function and ``F`` the residual. x_k : ndarray Initial position d : ndarray Search direction f_k : float Initial merit function value C, Q : float Control parameters. On the first iteration, give values Q=1.0, C=f_k eta : float Allowed merit function increase, see [1]_ nu, gamma, tau_min, tau_max : float, optional Search parameters, see [1]_ Returns ------- alpha : float Step length xp : ndarray Next position fp : float Merit function value at next position Fp : ndarray Residual at next position C : float New value for the control parameter C Q : float New value for the control parameter Q References ---------- .. [1] W. Cheng & D.-H. Li, ''A derivative-free nonmonotone line search and its application to the spectral residual method'', IMA J. Numer. Anal. 29, 814 (2009). """ alpha_p = 1 alpha_m = 1 alpha = 1 while True: xp = x_k + alpha_p * d fp, Fp = f(xp) if fp <= C + eta - gamma * alpha_p**2 * f_k: alpha = alpha_p break alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k) xp = x_k - alpha_m * d fp, Fp = f(xp) if fp <= C + eta - gamma * alpha_m**2 * f_k: alpha = -alpha_m break alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k) alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p) alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m) # Update C and Q Q_next = nu * Q + 1 C = (nu * Q * (C + eta) + fp) / Q_next Q = Q_next return alpha, xp, fp, Fp, C, Q
Edit
Rename
Chmod
Delete
FILE
FOLDER
Name
Size
Permission
Action
__pycache__
---
0755
_lsq
---
0755
tests
---
0755
__init__.py
6526 bytes
0644
_basinhopping.py
26053 bytes
0644
_cobyla.cpython-35m-x86_64-linux-gnu.so
128896 bytes
0755
_differentialevolution.py
32558 bytes
0644
_group_columns.cpython-35m-x86_64-linux-gnu.so
141704 bytes
0755
_hungarian.py
9379 bytes
0644
_lbfgsb.cpython-35m-x86_64-linux-gnu.so
133600 bytes
0755
_linprog.py
37305 bytes
0644
_minimize.py
26435 bytes
0644
_minpack.cpython-35m-x86_64-linux-gnu.so
131488 bytes
0755
_nnls.cpython-35m-x86_64-linux-gnu.so
46656 bytes
0755
_numdiff.py
21209 bytes
0644
_root.py
26007 bytes
0644
_slsqp.cpython-35m-x86_64-linux-gnu.so
100568 bytes
0755
_spectral.py
7986 bytes
0644
_trustregion.py
8598 bytes
0644
_trustregion_dogleg.py
4449 bytes
0644
_trustregion_ncg.py
4646 bytes
0644
_tstutils.py
1322 bytes
0644
_zeros.cpython-35m-x86_64-linux-gnu.so
15760 bytes
0755
cobyla.py
9915 bytes
0644
lbfgsb.py
18069 bytes
0644
linesearch.py
24201 bytes
0644
minpack.py
30534 bytes
0644
minpack2.cpython-35m-x86_64-linux-gnu.so
47448 bytes
0755
moduleTNC.cpython-35m-x86_64-linux-gnu.so
40336 bytes
0755
nnls.py
1423 bytes
0644
nonlin.py
46681 bytes
0644
optimize.py
101006 bytes
0644
setup.py
3267 bytes
0644
slsqp.py
17983 bytes
0644
tnc.py
16533 bytes
0644
zeros.py
19912 bytes
0644
N4ST4R_ID | Naxtarrr