import plotly.io as pio; pio.renderers.default = "notebook_connected"
Lecture 15 – CS 189, Fall 2025
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly import figure_factory as ff
from plotly.subplots import make_subplots
colors = px.colors.qualitative.Plotly
px.defaults.width = 800
# from ipywidgets import HBox
import numpy as np
Numerical Differentiation
Defining the function f that computes the expression
$$
f(x_1, x_2) = x_1 x_2 + e^{x_1 x_2} - \sin(x_2)
$$
def f(x1, x2):
return x1 * x2 + np.exp(x1 * x2) - np.sin(x2)
print("f(1,2)", f(1,2))
f(1,2) 8.47975867210497
x1, x2 = np.meshgrid(np.linspace(-1, 1, 100), np.linspace(-1, 1, 100))
z = f(x1, x2)
fig = go.Figure()
fig.add_surface(x=x1, y=x2, z=z, colorscale="viridis")
fig.update_layout(
title="Surface Plot of f(x1, x2) = x1 * x2 + exp(x1 * x2) - sin(x2)",
width=800, height=600,
scene=dict(
xaxis_title='x1', yaxis_title='x2', zaxis_title='f(x1, x2)',
aspectmode='cube'
)
)
Numerical differentiation is a technique used to approximate the derivative of a function at a given point. It is particularly useful when the function is complex or not easily differentiable analytically.
Computing the derivative numerically using finite differences:
$$ \frac{\partial f}{\partial x_1} \approx \frac{f(x_1 + h, x_2) - f(x_1 - h, x_2)}{2h} $$
def numeric_gradient(f, x1, x2, h=1e-8):
"""
Compute the numerical derivative of f with respect to x1 at (x1, x2)
using central difference.
"""
return [(f(x1 + h, x2) - f(x1 - h, x2)) / (2 * h),
(f(x1, x2 + h) - f(x1, x2 - h)) / (2 * h)]
numeric_gradient(f, 1, 2)
[np.float64(16.778112232884723), np.float64(8.80520287793729)]
Symbolic Differentiation
We can also derive the gradient using a symbolic algebra library. This is a powerful technique that allows us to compute the gradient of a function without having to derive it by hand. We will use the sympy library to do this.
Defining the function f that computes the expression
$$
f(x_1, x_2) = x_1 x_2 + e^{x_1 x_2} - \sin(x_2)
$$
import sympy as sp
# define our symbols
x1, x2 = sp.symbols('x1 x2')
# Define a symbolic expression for the error
E = x1 * x2 + sp.exp(x1 * x2) - sp.sin(x2)
# Compute the gradient of E with respect to x1 and x2
gE = [sp.diff(E, var) for var in (x1, x2)]
gE
[x2*exp(x1*x2) + x2, x1*exp(x1*x2) + x1 - cos(x2)]
We can use the symbolic representation to implement the gradient numpy function.
gEfun = sp.lambdify((x1, x2), gE)
gEfun(1,2)
[np.float64(16.7781121978613), np.float64(8.805202935477793)]
numeric_gradient(f, 1, 2)
[np.float64(16.778112232884723), np.float64(8.80520287793729)]
Back propagation Example
class Scalar:
def __init__(self, name, value, parents=[]):
self.name = name
self.value = value
self.parents = parents # a list of tuples (parent tensor, d self / d parent)
def __add__(self, other):
"""Addition operator: self + other"""
if isinstance(other, (int, float)):
return Scalar(f"({self.name} + {other})",
self.value + other,
[(self, 1.0)])
elif isinstance(other, Scalar):
return Scalar(f"({self.name} + {other.name})",
self.value + other.value,
[(self, 1.0), (other, 1.0)])
else:
raise TypeError(f"Unsupported type for addition: {type(other)}")
def __radd__(self, other):
"""Right addition operator: other + self"""
return self.__add__(other)
def __sub__(self, other):
"""Subtraction operator: self - other"""
if isinstance(other, (int, float)):
return Scalar(f"({self.name} - {other})",
self.value - other,
[(self, 1.0)])
elif isinstance(other, Scalar):
return Scalar(f"({self.name} - {other.name})",
self.value - other.value,
[(self, 1.0), (other, -1.0)])
else:
raise TypeError(f"Unsupported type for subtraction: {type(other)}")
def __rsub__(self, other):
"""Right subtraction operator: other - self"""
if isinstance(other, (int, float)):
return Scalar(f"({other} - {self.name})",
other - self.value,
[(self, -1.0)])
else:
raise TypeError(f"Unsupported type for subtraction: {type(other)}")
def __mul__(self, other):
"""Multiplication operator: self * other"""
if isinstance(other, (int, float)):
return Scalar(f"({self.name} * {other})",
self.value * other,
[(self, other)])
elif isinstance(other, Scalar):
return Scalar(f"({self.name} * {other.name})",
self.value * other.value,
[(self, other.value), (other, self.value)])
else:
raise TypeError(f"Unsupported type for multiplication: {type(other)}")
def __rmul__(self, other):
"""Right multiplication operator: other * self"""
return self.__mul__(other)
def __truediv__(self, other):
"""Division operator: self / other"""
if isinstance(other, (int, float)):
if other == 0:
raise ValueError("Division by zero")
return Scalar(f"({self.name} / {other})",
self.value / other,
[(self, 1.0 / other)])
elif isinstance(other, Scalar):
if other.value == 0:
raise ValueError("Division by zero")
return Scalar(f"({self.name} / {other.name})",
self.value / other.value,
[(self, 1.0 / other.value),
(other, -self.value / (other.value ** 2))])
else:
raise TypeError(f"Unsupported type for division: {type(other)}")
def __rtruediv__(self, other):
"""Right division operator: other / self"""
if not isinstance(other, (int, float)):
raise TypeError(f"Unsupported type for division: {type(other)}")
if self.value == 0:
raise ValueError("Division by zero")
# For c/f(x), derivative is -c*f'(x)/f(x)²
return Scalar(f"({other} / {self.name})",
other / self.value,
[(self, -other / (self.value ** 2))])
def __neg__(self):
"""Unary negative operator: -self"""
return Scalar(f"(-{self.name})",
-self.value,
[(self, -1.0)])
def exp(x):
"""Exponential function"""
if not isinstance(x, Scalar):
raise TypeError(f"Unsupported type for exp: {type(x)}")
return Scalar(f"exp({x.name})", np.exp(x.value), [(x, np.exp(x.value))])
def sin(x):
"""Sine function"""
if not isinstance(x, Scalar):
raise TypeError(f"Unsupported type for sin: {type(x)}")
return Scalar(f"sin({x.name})", np.sin(x.value), [(x, np.cos(x.value))])
def ln(x):
"""Natural logarithm function"""
if not isinstance(x, Scalar):
raise TypeError(f"Unsupported type for ln: {type(x)}")
if x.value <= 0:
raise ValueError("Natural logarithm is only defined for positive values")
return Scalar(f"ln({x.name})", np.log(x.value), [(x, 1.0 / x.value)])
def __repr__(self):
return self.__str__()
def __str__(self):
parents_str = ', '.join([p.name for (p,g) in self.parents])
return f"Scalar(name='{self.name}', value={self.value}, parents={parents_str})"
x1 = Scalar("x", 1.0)
x2 = Scalar("y", 2.0)
E = x1 * x2 + Scalar.exp(x1 * x2) - Scalar.sin(x2)
E
Scalar(name='(((x * y) + exp((x * y))) - sin(y))', value=8.47975867210497, parents=((x * y) + exp((x * y))), sin(y))
def backward(final_node):
from collections import defaultdict
# construct a topological ordering of the nodes
# so we can process them in reverse order
def topological_sort(node, visited, sorted_nodes):
if node in visited: return
visited.add(node)
for parent, _ in node.parents:
topological_sort(parent, visited, sorted_nodes)
sorted_nodes.append(node)
sorted_nodes = []
topological_sort(final_node, set(), sorted_nodes)
sorted_nodes.reverse() # process from output to inputs
# Compute the adjoints
adjoint = defaultdict(float)
adjoint[final_node] = 1.0 # d final_node / d final_node = 1
for node in sorted_nodes:
# Because we are going in reverse topological order,
# the adjoint of the current node is already computed
for parent, grad in node.parents:
adjoint[parent] += adjoint[node] * grad
return adjoint
adjoints = backward(E)
adjoints[x1], adjoints[x2]
(np.float64(16.7781121978613), np.float64(8.805202935477793))
Don't Forget to Topological Sort!
Here is a more "intuitive" implementation of backpropagation. This implementation does not topologically sort the nodes, and therefore does not work in general.
def invalid_backward(final_node):
from collections import defaultdict
queue = [final_node]
visited = set()
adjoint = defaultdict(float)
adjoint[final_node] = 1.0 # d final_node / d final_node = 1
while queue:
node = queue.pop()
if node in visited: continue
visited.add(node)
print(f"Visiting {node.name}, adjoint={adjoint[node]}")
for parent, grad in node.parents:
print(f" Propagating to {parent.name} with grad {grad}")
# This is wrong because the adjoint may not be completely computed yet
adjoint[parent] += grad * adjoint[node]
if parent not in visited:
queue.append(parent)
return adjoint
adjoint = invalid_backward(E)
adjoint[x1], adjoint[x2]
Visiting (((x * y) + exp((x * y))) - sin(y)), adjoint=1.0 Propagating to ((x * y) + exp((x * y))) with grad 1.0 Propagating to sin(y) with grad -1.0 Visiting sin(y), adjoint=-1.0 Propagating to y with grad -0.4161468365471424 Visiting y, adjoint=0.4161468365471424 Visiting ((x * y) + exp((x * y))), adjoint=1.0 Propagating to (x * y) with grad 1.0 Propagating to exp((x * y)) with grad 1.0 Visiting exp((x * y)), adjoint=1.0 Propagating to (x * y) with grad 7.38905609893065 Visiting (x * y), adjoint=7.38905609893065 Propagating to x with grad 2.0 Propagating to y with grad 1.0 Visiting x, adjoint=14.7781121978613 Visiting (x * y), adjoint=1.0 Propagating to x with grad 2.0 Propagating to y with grad 1.0
(np.float64(16.7781121978613), np.float64(8.805202935477793))
a = x1 * 2
b = 1/a
c = Scalar.ln(b)
d = c + a
d
Scalar(name='(ln((1 / (x * 2))) + (x * 2))', value=1.3068528194400546, parents=ln((1 / (x * 2))), (x * 2))
adjoints = backward(d)
adjoints[x1]
1.0
adjoints = invalid_backward(d)
adjoints[x1]
Visiting (ln((1 / (x * 2))) + (x * 2)), adjoint=1.0 Propagating to ln((1 / (x * 2))) with grad 1.0 Propagating to (x * 2) with grad 1.0 Visiting (x * 2), adjoint=1.0 Propagating to x with grad 2 Visiting x, adjoint=2.0 Visiting ln((1 / (x * 2))), adjoint=1.0 Propagating to (1 / (x * 2)) with grad 2.0 Visiting (1 / (x * 2)), adjoint=2.0 Propagating to (x * 2) with grad -0.25
2.0
Forward Autodiff Example
Bonus material: Forward mode automatic differentiation. This is a technique that allows us to compute the gradient of a function by propagating derivatives through the computation graph.
Implementing Forward mode autodiff by defining a custom class.
class ForwardVar:
def __init__(self, value, grad, name=None):
self.value = value
self.name = name
self.grad = grad
def __add__(self, other):
"""Addition operator: self + other"""
if not isinstance(other, (int, float, ForwardVar)):
raise TypeError(f"Unsupported type for addition: {type(other)}")
if isinstance(other, (int, float)):
return ForwardVar(self.value + other,
self.grad,
name=f"({self.name} + {other})")
else:
value = self.value + other.value
grad = np.zeros_like(self.grad)
for i in range(len(self.grad)):
grad[i] = self.grad[i] + other.grad[i]
return ForwardVar(value, grad, name=f"({self.name} + {other.name})")
def __radd__(self, other):
return self.__add__(other)
def __sub__(self, other):
"""Subtraction operator: self - other"""
if not isinstance(other, (int, float, ForwardVar)):
raise TypeError(f"Unsupported type for subtraction: {type(other)}")
if isinstance(other, (int, float)):
return ForwardVar(self.value - other,
self.grad,
name=f"({self.name} - {other})")
else:
value = self.value - other.value
grad = np.zeros_like(self.grad)
for i in range(len(self.grad)):
grad[i] = self.grad[i] - other.grad[i]
return ForwardVar(value, grad, name=f"({self.name} - {other.name})")
def __rsub__(self, other):
"""Right subtraction operator: other - self"""
if not isinstance(other, (int, float)):
raise TypeError(f"Unsupported type for subtraction: {type(other)}")
return ForwardVar(other - self.value,
-self.grad,
name=f"({other} - {self.name})")
def __mul__(self, other):
"""Multiplication operator: self * other"""
if not isinstance(other, (int, float, ForwardVar)):
raise TypeError(f"Unsupported type for multiplication: {type(other)}")
if isinstance(other, (int, float)):
return ForwardVar(self.value * other,
self.grad * other,
name=f"({self.name} * {other})")
else:
value = self.value * other.value
grad = np.zeros_like(self.grad)
for i in range(len(self.grad)):
grad[i] = self.grad[i] * other.value + self.value * other.grad[i]
return ForwardVar(value, grad, name=f"({self.name} * {other.name})")
def __rmul__(self, other):
"""Right multiplication operator: other * self"""
return self.__mul__(other)
def __truediv__(self, other):
"""Division operator: self / other"""
if not isinstance(other, (int, float, ForwardVar)):
raise TypeError(f"Unsupported type for division: {type(other)}")
if isinstance(other, (int, float)):
if other == 0:
raise ValueError("Division by zero")
return ForwardVar(self.value / other,
self.grad / other,
name=f"({self.name} / {other})")
else:
if other.value == 0:
raise ValueError("Division by zero")
# product rule: (u/v)' = (u * (1/v))' = u'/v - (u/v^2)*v'
value = self.value / other.value
grad = np.zeros_like(self.grad)
for i in range(len(self.grad)):
grad[i] = self.grad[i] / other.value - (value / (other.value**2)) * other.grad[i]
return ForwardVar(value, grad, name=f"({self.name} / {other.name})")
def __rtruediv__(self, other):
"""Right division operator: other / self"""
if not isinstance(other, (int, float)):
raise TypeError(f"Unsupported type for right division: {type(other)}")
if self.value == 0:
raise ValueError("Division by zero")
# For c/f(x), derivative is -c*f'(x)/f(x)²
value = other / self.value
grad = np.zeros_like(self.grad)
for i in range(len(self.grad)):
grad[i] = -other * self.grad[i] / (self.value ** 2)
return ForwardVar(value, grad, name=f"({other} / {self.name})")
def __neg__(self):
"""Unary negative operator: -self"""
return ForwardVar(-self.value,
-self.grad,
name=f"(-{self.name})")
def __repr__(self):
return self.__str__()
# adding print support
def __str__(self):
return f"ForwardVar(\n\tvalue={self.value}, \n\tgrad={self.grad}, \n\tname='{self.name}')"
def forward_exp(x : ForwardVar):
value = np.exp(x.value)
grad = np.zeros_like(x.grad)
for i in range(len(x.grad)):
grad[i] = x.grad[i] * value
return ForwardVar(value, grad, name=f"exp({x.name})")
def forward_sin(x : ForwardVar):
value = np.sin(x.value)
grad = np.zeros_like(x.grad)
for i in range(len(x.grad)):
grad[i] = x.grad[i] * np.cos(x.value)
return ForwardVar(value, grad, name=f"sin({x.name})")
v1 = ForwardVar(1, np.array([1., 0.]), name="x1")
v2 = ForwardVar(2, np.array([0., 1.]), name="x2")
v1 * v2 + forward_exp(v1 * v2) - forward_sin(v2)
ForwardVar( value=8.47975867210497, grad=[16.7781122 8.80520294], name='(((x1 * x2) + exp((x1 * x2))) - sin(x2))')