Imagine we want to build a simple AI that predicts a student’s test score (y) based on how many hours they studied (x).
We will assume a simple linear relationship passing through zero:
import torch# Set up
X = torch.tensor([1.0, 2.0, 3.0])
Y = torch.tensor([2.0, 4.0, 6.0])
w = torch.tensor(1.0, requires_grad=True)
print(x,y,w)
learning_rate = 0.1tensor([1., 2., 3.]) tensor([2., 4., 6.]) tensor(1., requires_grad=True)
# compute prediction
y_pred = w*x
print(y_pred)tensor(3.9840, grad_fn=<MulBackward0>)
# comput loss
loss = (y_pred - y)**2
print(loss)tensor(0.0003, grad_fn=<PowBackward0>)
w.grad.zero_()
loss.backward()
w.grad.item()-0.06400012969970703with torch.no_grad():
w -= learning_rate * w.gradprint(w.item())1.9919999837875366
y_pred = w*x
y_predtensor(3.6000, grad_fn=<MulBackward0>)Put everything together
tensor([[1.0000, 1.2000],
[2.0000, 1.2000]])# Set up
X = torch.tensor([[1.0,1.2],[2.0, 1.2] ])
Y = torch.tensor([[2.0], [4.0]])
W = torch.tensor([[1.0], [1.0]], requires_grad=True)
# b = torch.tensor(0.0, requires_grad=True)
learning_rate = 0.1
print(W)
for i in range(3):
# forward pass
y_pred = X@W
# loss
loss = ((y_pred - Y)**2).mean()
# backward pass
loss.backward()
# update weight
with torch.no_grad():
W -= learning_rate * W.grad
# reset gradient
print(f"Gradient: {W.grad}")
W.grad.zero_()
# b.grad.zero_()
print(f"parameter: w={W}")
print(f"Prediction: {y_pred}")
final_y = W*X
print(final_y)tensor([[1.],
[1.]], requires_grad=True)
Gradient: tensor([[-1.4000],
[-0.7200]])
parameter: w=tensor([[1.1400],
[1.0720]], requires_grad=True)
Prediction: tensor([[2.2000],
[3.2000]], grad_fn=<MmBackward0>)
Gradient: tensor([[-0.4408],
[-0.0086]])
parameter: w=tensor([[1.1841],
[1.0729]], requires_grad=True)
Prediction: tensor([[2.4264],
[3.5664]], grad_fn=<MmBackward0>)
Gradient: tensor([[-0.2173],
[ 0.1525]])
parameter: w=tensor([[1.2058],
[1.0576]], requires_grad=True)
Prediction: tensor([[2.4715],
[3.6556]], grad_fn=<MmBackward0>)
tensor([[1.2058, 1.4470],
[2.1152, 1.2691]], grad_fn=<MulBackward0>)
Xtensor([[1.0000, 2.0000, 3.0000],
[1.2000, 1.2000, 1.3000]])Wtensor([[1.1667],
[1.1820]], requires_grad=True)import torch
# 1. Setup
x = torch.tensor(2.0)
y = torch.tensor(4.0)
w = torch.tensor(1.0, requires_grad=True)
learning_rate = 0.1
print(f"Initial Prediction: {w * x:.1f}") # Output: 2.0
# 2. Forward & Loss
y_pred = w * x
loss = (y_pred - y)**2
# 3. Backward
loss.backward()
print(f"Gradient: {w.grad.item():.1f}") # Output: -8.0
# 4. Update
with torch.no_grad():
w -= learning_rate * w.grad
print(f"New Weight: {w.item():.1f}") # Output: 1.8
print(f"New Prediction: {w * x:.1f}") # Output: 3.6Initial Prediction: 2.0
Gradient: -8.0
New Weight: 1.8
New Prediction: 3.6
import torch
# 1. Setup
x = torch.tensor(2.0)
y = torch.tensor(4.0)
w = torch.tensor(1.0, requires_grad=True)
learning_rate = 0.1
print(f"{'Iter':<5} | {'w (Start)':<10} | {'Pred':<10} | {'Grad':<10} | {'w (End)':<10}")
print("-" * 55)
# LOOP FOR 3 ITERATIONS
for i in range(3):
# A. Forward Pass
y_pred = w * x
# B. Compute Loss
loss = (y_pred - y)**2
# C. Backward Pass (Calculate Gradient)
loss.backward()
# Capture current state for printing
current_w = w.item()
current_grad = w.grad.item()
current_pred = y_pred.item()
# D. Update Weight
with torch.no_grad():
w -= learning_rate * w.grad
# E. IMPORTANT: Zero the gradient!
# If we forget this, the gradient in Iter 2 becomes (-8 + -1.6) = -9.6!
w.grad.zero_()
print(f"{i+1:<5} | {current_w:<10.3f} | {current_pred:<10.3f} | {current_grad:<10.3f} | {w.item():<10.3f}")
print(f"\nFinal Prediction: {w * x:.3f} (Target: 4.000)")Iter | w (Start) | Pred | Grad | w (End)
-------------------------------------------------------
1 | 1.000 | 2.000 | -8.000 | 1.800
2 | 1.800 | 3.600 | -1.600 | 1.960
3 | 1.960 | 3.920 | -0.320 | 1.992
Final Prediction: 3.984 (Target: 4.000)
import torch
# 1. Dataset (Batch)
X = torch.tensor([1.0, 2.0, 3.0])
Y = torch.tensor([2.0, 4.0, 6.0])
# 2. Initialize
w = torch.tensor(1.0, requires_grad=True)
learning_rate = 0.1
print(f"{'Epoch':<5} | {'w (Start)':<10} | {'Avg Grad':<10} | {'w (End)':<10} | {'Loss (MSE)':<10}")
print("-" * 65)
# 3. Training Loop
for epoch in range(3):
# A. Forward Pass (Predict all 3 at once)
y_pred = w * X
# B. Loss (Mean Squared Error)
# This automatically sums errors and divides by 3
loss = ((y_pred - Y)**2).mean()
# C. Backward Pass (Compute Avg Gradient)
loss.backward()
# Save values for printing
current_w = w.item()
grad = w.grad.item()
# D. Update
with torch.no_grad():
w -= learning_rate * w.grad
# E. Zero Gradient
w.grad.zero_()
print(f"{epoch+1:<5} | {current_w:<10.3f} | {grad:<10.3f} | {w.item():<10.3f} | {loss.item():<10.4f}")
print(f"\nFinal Weight: {w.item():.4f} (Target: 2.0)")Epoch | w (Start) | Avg Grad | w (End) | Loss (MSE)
-----------------------------------------------------------------
1 | 1.000 | -9.333 | 1.933 | 4.6667
2 | 1.933 | -0.622 | 1.996 | 0.0207
3 | 1.996 | -0.041 | 2.000 | 0.0001
Final Weight: 1.9997 (Target: 2.0)