In [1]:
import plotly.io as pio; pio.renderers.default = "notebook_connected"

Lecture 15 – CS 189, Fall 2025

In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly import figure_factory as ff
from plotly.subplots import make_subplots
colors = px.colors.qualitative.Plotly
px.defaults.width = 800
# from ipywidgets import HBox
import numpy as np

Numerical Differentiation

Defining the function f that computes the expression $$ f(x_1, x_2) = x_1 x_2 + e^{x_1 x_2} - \sin(x_2) $$

In [3]:
def f(x1, x2):
    return x1 * x2 + np.exp(x1 * x2) - np.sin(x2)
    
print("f(1,2)", f(1,2))
f(1,2) 8.47975867210497
In [4]:
x1, x2 = np.meshgrid(np.linspace(-1, 1, 100), np.linspace(-1, 1, 100))
z = f(x1, x2)   
fig = go.Figure()
fig.add_surface(x=x1, y=x2, z=z, colorscale="viridis")
fig.update_layout(
    title="Surface Plot of f(x1, x2) = x1 * x2 + exp(x1 * x2) - sin(x2)",
    width=800, height=600,
    scene=dict(
        xaxis_title='x1', yaxis_title='x2', zaxis_title='f(x1, x2)',
        aspectmode='cube'
    )
)

Numerical differentiation is a technique used to approximate the derivative of a function at a given point. It is particularly useful when the function is complex or not easily differentiable analytically.

Computing the derivative numerically using finite differences:

$$ \frac{\partial f}{\partial x_1} \approx \frac{f(x_1 + h, x_2) - f(x_1 - h, x_2)}{2h} $$

In [5]:
def numeric_gradient(f, x1, x2, h=1e-8):
    """
    Compute the numerical derivative of f with respect to x1 at (x1, x2)
    using central difference.
    """
    return [(f(x1 + h, x2) - f(x1 - h, x2)) / (2 * h), 
            (f(x1, x2 + h) - f(x1, x2 - h)) / (2 * h)]
In [6]:
numeric_gradient(f, 1, 2)
Out[6]:
[np.float64(16.778112232884723), np.float64(8.80520287793729)]

Symbolic Differentiation

We can also derive the gradient using a symbolic algebra library. This is a powerful technique that allows us to compute the gradient of a function without having to derive it by hand. We will use the sympy library to do this.

Defining the function f that computes the expression $$ f(x_1, x_2) = x_1 x_2 + e^{x_1 x_2} - \sin(x_2) $$

In [7]:
import sympy as sp
# define our symbols
x1, x2 = sp.symbols('x1 x2')
# Define a symbolic expression for the error
E = x1 * x2 + sp.exp(x1 * x2) - sp.sin(x2)
# Compute the gradient of E with respect to x1 and x2
gE = [sp.diff(E, var) for var in (x1, x2)]
gE
Out[7]:
[x2*exp(x1*x2) + x2, x1*exp(x1*x2) + x1 - cos(x2)]

We can use the symbolic representation to implement the gradient numpy function.

In [8]:
gEfun = sp.lambdify((x1, x2), gE)
In [9]:
gEfun(1,2)
Out[9]:
[np.float64(16.7781121978613), np.float64(8.805202935477793)]
In [10]:
numeric_gradient(f, 1, 2)
Out[10]:
[np.float64(16.778112232884723), np.float64(8.80520287793729)]

Back propagation Example

In [11]:
class Scalar:
    def __init__(self, name, value, parents=[]):
        self.name = name
        self.value = value
        self.parents = parents  # a list of tuples (parent tensor, d self / d parent)

    def __add__(self, other):
        """Addition operator: self + other"""
        if isinstance(other, (int, float)):
            return Scalar(f"({self.name} + {other})", 
                          self.value + other, 
                          [(self, 1.0)])
        elif isinstance(other, Scalar):
            return Scalar(f"({self.name} + {other.name})", 
                          self.value + other.value, 
                          [(self, 1.0), (other, 1.0)])
        else: 
            raise TypeError(f"Unsupported type for addition: {type(other)}")
           
    def __radd__(self, other):
        """Right addition operator: other + self"""
        return self.__add__(other) 
    
    def __sub__(self, other):
        """Subtraction operator: self - other"""
        if isinstance(other, (int, float)):
            return Scalar(f"({self.name} - {other})", 
                            self.value - other, 
                            [(self, 1.0)])
        elif isinstance(other, Scalar):
            return Scalar(f"({self.name} - {other.name})", 
                            self.value - other.value, 
                            [(self, 1.0), (other, -1.0)])
        else: 
            raise TypeError(f"Unsupported type for subtraction: {type(other)}")

    def __rsub__(self, other):
        """Right subtraction operator: other - self"""
        if isinstance(other, (int, float)):
            return Scalar(f"({other} - {self.name})",
                          other - self.value, 
                          [(self, -1.0)]) 
        else:
            raise TypeError(f"Unsupported type for subtraction: {type(other)}")

    def __mul__(self, other):
        """Multiplication operator: self * other"""
        if isinstance(other, (int, float)):
            return Scalar(f"({self.name} * {other})", 
                            self.value * other, 
                            [(self, other)])
        elif isinstance(other, Scalar):   
            return Scalar(f"({self.name} * {other.name})", 
                            self.value * other.value, 
                            [(self, other.value), (other, self.value)])
        else: 
            raise TypeError(f"Unsupported type for multiplication: {type(other)}")

    def __rmul__(self, other):
        """Right multiplication operator: other * self"""
        return self.__mul__(other)

    def __truediv__(self, other):
        """Division operator: self / other"""
        if isinstance(other, (int, float)):
            if other == 0: 
                raise ValueError("Division by zero")
            return Scalar(f"({self.name} / {other})", 
                            self.value / other, 
                            [(self, 1.0 / other)])
        elif isinstance(other, Scalar):
            if other.value == 0: 
                raise ValueError("Division by zero")
            return Scalar(f"({self.name} / {other.name})", 
                            self.value / other.value, 
                            [(self, 1.0 / other.value), 
                             (other, -self.value / (other.value ** 2))])
        else: 
            raise TypeError(f"Unsupported type for division: {type(other)}")


    def __rtruediv__(self, other):
        """Right division operator: other / self"""
        if not isinstance(other, (int, float)):
            raise TypeError(f"Unsupported type for division: {type(other)}")
        if self.value == 0:
            raise ValueError("Division by zero")
        # For c/f(x), derivative is -c*f'(x)/f(x)²
        return Scalar(f"({other} / {self.name})",
                        other / self.value, 
                        [(self, -other / (self.value ** 2))])

    def __neg__(self):
        """Unary negative operator: -self"""
        return Scalar(f"(-{self.name})",
                        -self.value, 
                        [(self, -1.0)])

    def exp(x):
        """Exponential function"""
        if not isinstance(x, Scalar):
            raise TypeError(f"Unsupported type for exp: {type(x)}")
        return Scalar(f"exp({x.name})", np.exp(x.value), [(x, np.exp(x.value))])
    
    def sin(x):
        """Sine function"""
        if not isinstance(x, Scalar):
            raise TypeError(f"Unsupported type for sin: {type(x)}")
        return Scalar(f"sin({x.name})", np.sin(x.value), [(x, np.cos(x.value))])
    
    def ln(x):
        """Natural logarithm function"""
        if not isinstance(x, Scalar):
            raise TypeError(f"Unsupported type for ln: {type(x)}")
        if x.value <= 0:
            raise ValueError("Natural logarithm is only defined for positive values")
        return Scalar(f"ln({x.name})", np.log(x.value), [(x, 1.0 / x.value)])
    
    def __repr__(self):
        return self.__str__()
    
    def __str__(self):
        parents_str = ', '.join([p.name for (p,g) in self.parents])
        return f"Scalar(name='{self.name}', value={self.value}, parents={parents_str})"
    
In [12]:
x1 = Scalar("x", 1.0)
x2 = Scalar("y", 2.0)
E = x1 * x2 + Scalar.exp(x1 * x2) - Scalar.sin(x2)
E
Out[12]:
Scalar(name='(((x * y) + exp((x * y))) - sin(y))', value=8.47975867210497, parents=((x * y) + exp((x * y))), sin(y))
In [13]:
def backward(final_node):
    from collections import defaultdict
    # construct a topological ordering of the nodes 
    # so we can process them in reverse order
    def topological_sort(node, visited, sorted_nodes):
        if node in visited: return
        visited.add(node)
        for parent, _ in node.parents:
            topological_sort(parent, visited, sorted_nodes)
        sorted_nodes.append(node)
    sorted_nodes = []
    topological_sort(final_node, set(), sorted_nodes)
    sorted_nodes.reverse()  # process from output to inputs
    
    # Compute the adjoints
    adjoint = defaultdict(float)
    adjoint[final_node] = 1.0  # d final_node / d final_node = 1
    for node in sorted_nodes:
        # Because we are going in reverse topological order, 
        # the adjoint of the current node is already computed
        for parent, grad in node.parents:
            adjoint[parent] += adjoint[node] * grad
    return adjoint
In [14]:
adjoints = backward(E)
adjoints[x1], adjoints[x2]
Out[14]:
(np.float64(16.7781121978613), np.float64(8.805202935477793))

Don't Forget to Topological Sort!

Here is a more "intuitive" implementation of backpropagation. This implementation does not topologically sort the nodes, and therefore does not work in general.

In [15]:
def invalid_backward(final_node):
    from collections import defaultdict
    queue = [final_node]
    visited = set()
    adjoint = defaultdict(float)
    adjoint[final_node] = 1.0  # d final_node / d final_node = 1
    while queue:
        node = queue.pop()
        if node in visited: continue
        visited.add(node)
        print(f"Visiting {node.name}, adjoint={adjoint[node]}")
        for parent, grad in node.parents:
            print(f"   Propagating to {parent.name} with grad {grad}")
            # This is wrong because the adjoint may not be completely computed yet
            adjoint[parent] += grad * adjoint[node]
            if parent not in visited:
                queue.append(parent)
    return adjoint
In [16]:
adjoint = invalid_backward(E)
adjoint[x1], adjoint[x2]
Visiting (((x * y) + exp((x * y))) - sin(y)), adjoint=1.0
   Propagating to ((x * y) + exp((x * y))) with grad 1.0
   Propagating to sin(y) with grad -1.0
Visiting sin(y), adjoint=-1.0
   Propagating to y with grad -0.4161468365471424
Visiting y, adjoint=0.4161468365471424
Visiting ((x * y) + exp((x * y))), adjoint=1.0
   Propagating to (x * y) with grad 1.0
   Propagating to exp((x * y)) with grad 1.0
Visiting exp((x * y)), adjoint=1.0
   Propagating to (x * y) with grad 7.38905609893065
Visiting (x * y), adjoint=7.38905609893065
   Propagating to x with grad 2.0
   Propagating to y with grad 1.0
Visiting x, adjoint=14.7781121978613
Visiting (x * y), adjoint=1.0
   Propagating to x with grad 2.0
   Propagating to y with grad 1.0
Out[16]:
(np.float64(16.7781121978613), np.float64(8.805202935477793))
In [17]:
a = x1 * 2
b = 1/a 
c = Scalar.ln(b)
d = c + a
d
Out[17]:
Scalar(name='(ln((1 / (x * 2))) + (x * 2))', value=1.3068528194400546, parents=ln((1 / (x * 2))), (x * 2))
In [18]:
adjoints = backward(d)
adjoints[x1]
Out[18]:
1.0
In [19]:
adjoints = invalid_backward(d)
adjoints[x1]
Visiting (ln((1 / (x * 2))) + (x * 2)), adjoint=1.0
   Propagating to ln((1 / (x * 2))) with grad 1.0
   Propagating to (x * 2) with grad 1.0
Visiting (x * 2), adjoint=1.0
   Propagating to x with grad 2
Visiting x, adjoint=2.0
Visiting ln((1 / (x * 2))), adjoint=1.0
   Propagating to (1 / (x * 2)) with grad 2.0
Visiting (1 / (x * 2)), adjoint=2.0
   Propagating to (x * 2) with grad -0.25
Out[19]:
2.0

Forward Autodiff Example

Bonus material: Forward mode automatic differentiation. This is a technique that allows us to compute the gradient of a function by propagating derivatives through the computation graph.

Implementing Forward mode autodiff by defining a custom class.

In [20]:
class ForwardVar:
    def __init__(self, value, grad, name=None):
        self.value = value
        self.name = name
        self.grad = grad

    def __add__(self, other):
        """Addition operator: self + other"""
        if not isinstance(other, (int, float, ForwardVar)):
            raise TypeError(f"Unsupported type for addition: {type(other)}")
        if isinstance(other, (int, float)):
            return ForwardVar(self.value + other, 
                              self.grad, 
                              name=f"({self.name} + {other})")
        else:
            value = self.value + other.value
            grad = np.zeros_like(self.grad)
            for i in range(len(self.grad)):
                grad[i] = self.grad[i] + other.grad[i]
            return ForwardVar(value, grad, name=f"({self.name} + {other.name})")
        
    def __radd__(self, other):
        return self.__add__(other)
    
    def __sub__(self, other):
        """Subtraction operator: self - other"""
        if not isinstance(other, (int, float, ForwardVar)):
            raise TypeError(f"Unsupported type for subtraction: {type(other)}")

        if isinstance(other, (int, float)):
            return ForwardVar(self.value - other, 
                              self.grad, 
                              name=f"({self.name} - {other})")
        else:
            value = self.value - other.value
            grad = np.zeros_like(self.grad)
            for i in range(len(self.grad)):
                grad[i] = self.grad[i] - other.grad[i]
            return ForwardVar(value, grad, name=f"({self.name} - {other.name})")
    
    def __rsub__(self, other):
        """Right subtraction operator: other - self"""
        if not isinstance(other, (int, float)):
            raise TypeError(f"Unsupported type for subtraction: {type(other)}")
        return ForwardVar(other - self.value, 
                            -self.grad, 
                            name=f"({other} - {self.name})")
    
    def __mul__(self, other):
        """Multiplication operator: self * other"""
        if not isinstance(other, (int, float, ForwardVar)):
            raise TypeError(f"Unsupported type for multiplication: {type(other)}")
        if isinstance(other, (int, float)):
            return ForwardVar(self.value * other, 
                              self.grad * other, 
                              name=f"({self.name} * {other})")
        else:
            value = self.value * other.value
            grad = np.zeros_like(self.grad)
            for i in range(len(self.grad)):
                grad[i] = self.grad[i] * other.value + self.value * other.grad[i]
            return ForwardVar(value, grad, name=f"({self.name} * {other.name})")
        
    def __rmul__(self, other):
        """Right multiplication operator: other * self"""
        return self.__mul__(other)
    
    def __truediv__(self, other):
        """Division operator: self / other"""
        if not isinstance(other, (int, float, ForwardVar)):
            raise TypeError(f"Unsupported type for division: {type(other)}")
        if isinstance(other, (int, float)):
            if other == 0:
                raise ValueError("Division by zero")
            return ForwardVar(self.value / other, 
                              self.grad / other, 
                              name=f"({self.name} / {other})")
        else:
            if other.value == 0:
                raise ValueError("Division by zero")
            # product rule: (u/v)' = (u * (1/v))' = u'/v - (u/v^2)*v'
            value = self.value / other.value
            grad = np.zeros_like(self.grad)
            for i in range(len(self.grad)):
                grad[i] = self.grad[i] / other.value - (value / (other.value**2)) * other.grad[i]  
            return ForwardVar(value, grad, name=f"({self.name} / {other.name})")
    
    def __rtruediv__(self, other):
        """Right division operator: other / self"""
        if not isinstance(other, (int, float)):
            raise TypeError(f"Unsupported type for right division: {type(other)}")
        if self.value == 0:
            raise ValueError("Division by zero")
        # For c/f(x), derivative is -c*f'(x)/f(x)²
        value = other / self.value
        grad = np.zeros_like(self.grad)
        for i in range(len(self.grad)):
            grad[i] = -other * self.grad[i] / (self.value ** 2)
        return ForwardVar(value, grad, name=f"({other} / {self.name})")
    
    def __neg__(self):
        """Unary negative operator: -self"""
        return ForwardVar(-self.value, 
                          -self.grad, 
                          name=f"(-{self.name})")
    
    def __repr__(self):
        return self.__str__()
    # adding print support
    def __str__(self):
        return f"ForwardVar(\n\tvalue={self.value}, \n\tgrad={self.grad}, \n\tname='{self.name}')"

def forward_exp(x : ForwardVar):
    value = np.exp(x.value)
    grad = np.zeros_like(x.grad)
    for i in range(len(x.grad)):
        grad[i] = x.grad[i] * value
    return ForwardVar(value, grad, name=f"exp({x.name})")

def forward_sin(x : ForwardVar):
    value = np.sin(x.value)
    grad = np.zeros_like(x.grad)
    for i in range(len(x.grad)):
        grad[i] = x.grad[i] * np.cos(x.value)
    return ForwardVar(value, grad, name=f"sin({x.name})")
In [21]:
v1 = ForwardVar(1, np.array([1., 0.]), name="x1")
v2 = ForwardVar(2, np.array([0., 1.]), name="x2")


v1 * v2 + forward_exp(v1 * v2) - forward_sin(v2)
Out[21]:
ForwardVar(
	value=8.47975867210497, 
	grad=[16.7781122   8.80520294], 
	name='(((x1 * x2) + exp((x1 * x2))) - sin(x2))')
In [ ]: