Source code for pathsim.optim.value

#########################################################################################
##
##                   DUAL NUMBER DEFINITION FOR AUTOMATIC DIFFERENTIATION  
##                                   (optim.value.py)
##
##                                   Milan Rother 2024
##
#########################################################################################

# IMPORTS ===============================================================================

import numpy as np
import functools

from collections import defaultdict


# HELPER FUNCTIONS ======================================================================

[docs] def der(arr, val): """Compute the derivative of an array of 'Value' objects with respect to 'val', fallback to scalar case and non-'Value' objects Parameters ---------- arr : array[Value] array or list of Values val : Value dual number value for AD Returns ------- der : array[float] partial derivatives w.r.t. the 'val' """ return np.array([a(val) if isinstance(a, Value) else 0.0 for a in np.atleast_1d(arr)])
[docs] def jac(arr, vals): """Compute the derivative of an array of 'Value' objects with respect to each 'Value' object in 'vals', fallback to scalars in both cases. This effectively constructs the jacobian. Parameters ---------- arr : array[Value] array or list of Values vals : array[Value] array or list of Values Returns ------- jac : array[float] partial derivatives w.r.t. all values in 'vals', effectively the jacobian """ return np.array([der(arr, val) for val in np.atleast_1d(vals)]).T
[docs] def autojac(fnc): """Decorator that wraps a function such that it computes its jacobian alongside its evaluaiton. Parameters ---------- fnc : callable function to wrap and compute jacobian of """ @functools.wraps(fnc) def wrap(*args): vals = Value.array(args) out = fnc(*vals) return Value.numeric(out), jac(out, vals) return wrap
# FUNCTION GRADIENTS ==================================================================== FUNC_GRAD = { # Trigonometric functions np.sin : lambda x: np.cos(x), np.cos : lambda x: -np.sin(x), np.tan : lambda x: 1/np.cos(x)**2, np.arcsin : lambda x: 1/np.sqrt(1 - x**2), np.arccos : lambda x: -1/np.sqrt(1 - x**2), np.arctan : lambda x: 1/(1 + x**2), np.sinh : lambda x: np.cosh(x), np.cosh : lambda x: np.sinh(x), np.tanh : lambda x: 1 - np.tanh(x)**2, np.arcsinh : lambda x: 1/np.sqrt(x**2 + 1), np.arccosh : lambda x: 1/(np.sqrt(x - 1)*np.sqrt(x + 1)), np.arctanh : lambda x: 1/(1 - x**2), # Exponential and logarithmic functions np.exp : lambda x: np.exp(x), np.exp2 : lambda x: np.exp2(x)*np.log(2), np.log : lambda x: 1/x, np.log2 : lambda x: 1/(x*np.log(2)), np.log10 : lambda x: 1/(x*np.log(10)), np.log1p : lambda x: 1/(1 + x), np.expm1 : lambda x: np.exp(x), # Power functions np.sqrt : lambda x: 0.5/np.sqrt(x), np.square : lambda x: 2*x, np.power : lambda x, p: p*np.power(x, p-1), np.cbrt : lambda x: 1/(3*np.cbrt(x)**2), # Complex functions np.conj : lambda x: np.conj(x), np.abs : lambda x: x/np.abs(x), np.angle : lambda x: -1j/x, # Statistical functions np.sign : lambda x: 0.0, } # WRAPPER THAT ADDS ADDITIONAL METHODS ==================================================
[docs] def add_funcs(cls): """Decorator that adds numpy functions as methods to the Value class while overloading them with their analytical derivatives to propagate the gradient with the chain rule. """ def create_method(fnc, grd): @functools.wraps(fnc) def method(self, *args, **kwargs): grad_val = grd(self.val, *args, **kwargs) return cls( val=fnc(self.val, *args, **kwargs), grad=defaultdict( float, {k: grad_val * v for k, v in self.grad.items()} ) ) return method # Add methods to class for fnc, grd in FUNC_GRAD.items(): if not hasattr(cls, fnc.__name__): setattr(cls, fnc.__name__, create_method(fnc, grd)) return cls
# CLASS DEFINITION ======================================================================
[docs] @add_funcs class Value: """Dual number 'Value' definition for small autograd framework. The dunder methods of the 'Value' class are overloaded to simultaneously compute the partial derivatives with respect to the instance itself and other instances of the value class. This is realized by a dictionary that handles the reference tracking via the id of the 'Value' instances. Example ------- Automatic differentiation of numpy functions: .. code-block:: python import numpy as np v = Value(3) #AD w = np.sin(v) #reference dw_dv = np.cos(v) print(dw_dv == w(v)) #True Automatic differentiation of concatenations: .. code-block:: python import numpy as np v = Value(2) #AD w = np.exp(np.sin(v) + v**3) #reference dw_dv = np.exp(np.sin(v) + v**3) * (np.cos(v) + 3*v**2) print(dw_dv == w(v)) #True Automatic differentiation with multiple values: .. code-block:: python import numpy as np u = Value(0.5) v = Value(2) #AD w = u * v + np.sin(u + v) #references dw_du = v + np.cos(u + v) dw_dv = u + np.cos(u + v) print(dw_du == w(u), dw_dv == w(v)) #True True Parameters ---------- val : float, int, complex The numerical value. grad : defaultdict, None The gradient dictionary. If None, initializes with self derivative. Attributes ---------- _id : int id for reference tracking in gradient dict """ #restrict attributes, makes access faster __slots__ = ["val", "grad", "_id"] def __init__(self, val=0.0, grad=None): #catch instantiating with Value if isinstance(val, Value): self.val = val.val else: self.val = val #initialize fresh gradients if grad is None: self._id = id(self) self.grad = defaultdict(float, {self._id: 1.0}) else: self._id = None self.grad = grad # dynamic properties ------------------------------------------------------------------------ @property def real(self): return Value( val=np.real(self.val), grad=defaultdict(float, {k: np.real(v) for k, v in self.grad.items()}) ) @property def imag(self): return Value( val=np.imag(self.val), grad=defaultdict(float, {k: np.imag(v) for k, v in self.grad.items()}) ) # array conversions -------------------------------------------------------------------------
[docs] @classmethod def numeric(cls, arr): """Cast an array with value objects to an array of numeric values. Numeric entries are just passed through. Parameters ---------- arr : array[obj] array of mixed value, numeric objects Returns ------- numeric : array[numeric] array of numeric values """ _arr = np.atleast_1d(arr) return np.array([a.val if isinstance(a, cls) else a for a in _arr.ravel()]).reshape(_arr.shape).squeeze()
[docs] @classmethod def array(cls, arr): """Cast an array or list to an array of value objects. For Value entries, their numeric values are used to create a new Value instance. Example ------- Instantiate multiple values from 1d array: .. code-block:: python a, b, c = Value.array([1, 2, 3]) Instantiate numpy array filled with values: .. code-block:: python import numpy as np A_num = np.random.rand(3, 5) A = Value.array(A_num) Parameters ---------- arr : array[obj] array of mixed value, numeric objects Returns ------- array : array[Value] array of Value objects """ _arr = np.atleast_1d(arr) return np.array([cls(a) for a in _arr.ravel()]).reshape(_arr.shape).squeeze()
# convenience static methods ----------------------------------------------------------------
[docs] @staticmethod def der(arr, val): return der(arr, val)
[docs] @staticmethod def jac(arr, vals): return jac(arr, vals)
# overload builtins ------------------------------------------------------------------------- def __call__(self, other): """ Get the partial derivative with respect to 'other'. Parameters ---------- other : Value variable with respect to which to take the derivative Returns ------- out : float The partial derivative value """ if isinstance(other, Value): return self.grad[other._id] else: return 0.0 def __hash__(self): return id(self) def __iter__(self): yield self def __len__(self): return len(self.grad) def __repr__(self): return f"Value(val={self.val}, grad={self.grad})" # comparison operators ---------------------------------------------------------------------- def __eq__(self, other): if isinstance(other, Value): return self.val == other.val else: return self.val == other def __ne__(self, other): if isinstance(other, Value): return self.val != other.val else: return self.val != other def __lt__(self, other): if isinstance(other, Value): return self.val < other.val else: return self.val < other def __gt__(self, other): if isinstance(other, Value): return self.val > other.val else: return self.val > other def __le__(self, other): if isinstance(other, Value): return self.val <= other.val else: return self.val <= other def __ge__(self, other): if isinstance(other, Value): return self.val >= other.val else: return self.val >= other # type casting operators -------------------------------------------------------------------- def __bool__(self): return bool(self.val) def __int__(self): return int(self.val) def __float__(self): return float(self.val) def __complex__(self): return complex(self.val) # unary operators --------------------------------------------------------------------------- def __pos__(self): return self def __neg__(self): return Value( val=-self.val, grad=defaultdict(float, {k: -v for k, v in self.grad.items()}) ) def __abs__(self): return Value( val=abs(self.val), grad=defaultdict(float, {k: v * np.sign(self.val) for k, v in self.grad.items()}) ) # arithmetic operators ---------------------------------------------------------------------- def __add__(self, other): if isinstance(other, Value): new_grad = defaultdict(float, self.grad) for k, v in other.grad.items(): new_grad[k] += v return Value( val=self.val + other.val, grad=new_grad ) elif isinstance(other, np.ndarray): return np.array([self + x for x in other]) else: return Value( val=self.val + other, grad=self.grad ) def __radd__(self, other): return self + other def __iadd__(self, other): if isinstance(other, Value): self.val += other.val for k, v in other.grad.items(): self.grad[k] += v return self else: self.val += other return self def __mul__(self, other): if isinstance(other, Value): new_grad = defaultdict(float) for k, v in self.grad.items(): new_grad[k] += v * other.val for k, v in other.grad.items(): new_grad[k] += v * self.val return Value( val=self.val * other.val, grad=new_grad ) elif isinstance(other, np.ndarray): return np.array([self * x for x in other]) else: return Value( val=self.val * other, grad=defaultdict(float, {k: v * other for k, v in self.grad.items()}) ) def __rmul__(self, other): return self * other def __imul__(self, other): if isinstance(other, Value): self.val *= other.val for k, v in self.grad.items(): self.grad[k] *= other.val for k, v in other.grad.items(): self.grad[k] += v * self.val return self else: self.val *= other return self def __sub__(self, other): if isinstance(other, Value): new_grad = defaultdict(float, self.grad) for k, v in other.grad.items(): new_grad[k] -= v return Value( val=self.val - other.val, grad=new_grad ) elif isinstance(other, np.ndarray): return np.array([self - x for x in other]) else: return Value( val=self.val - other, grad=self.grad ) def __rsub__(self, other): return -(self - other) def __isub__(self, other): if isinstance(other, Value): self.val -= other.val for k, v in other.grad.items(): self.grad[k] -= v return self else: self.val -= other return self def __truediv__(self, other): if isinstance(other, Value): new_grad = defaultdict(float) if other.val != 0.0: a, b = self.val/other.val**2, 1/other.val for k, v in self.grad.items(): new_grad[k] += v*b for k, v in other.grad.items(): new_grad[k] -= v*a return Value( val=self.val / other.val, grad=new_grad ) if isinstance(other, np.ndarray): return np.array([self / x for x in other]) else: return Value( val=self.val / other, grad=defaultdict(float, {k: v / other for k, v in self.grad.items()}) ) def __rtruediv__(self, other): if isinstance(other, Value): return other / self else: new_grad = defaultdict(float) if self.val != 0.0: a = -other / (self.val ** 2) for k, v in self.grad.items(): new_grad[k] += v*a return Value( val=other / self.val, grad=new_grad ) def __pow__(self, power): if isinstance(power, Value): new_val = self.val ** power.val new_grad = defaultdict(float) a, b = new_val * power.val / self.val, new_val * np.log(self.val) for k, v in self.grad.items(): new_grad[k] += v*a for k, v in power.grad.items(): new_grad[k] += v*b return Value( val=new_val, grad=new_grad ) else: return Value( val=self.val ** power, grad=defaultdict(float, {k: power * (self.val ** (power - 1)) * v for k, v in self.grad.items()}) ) def __rpow__(self, base): new_val = base ** self.val return Value( val=new_val, grad=defaultdict(float, {k: new_val * np.log(base) * v for k, v in self.grad.items()}) )