from IPython.display import display, HTML
display(HTML("<style>.container { width:85% !important; }</style>"))

import torch
import torch.nn.functional as F
from torch import tensor

import numpy as np
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(8, 6))
x = np.linspace(0.01, 1, 1000)
y = np.log(x)
plt.plot(x, y)

# Set labels and title
plt.xlabel('x')
plt.ylabel('log(x)')
plt.title('For x between 0 and 1, log(x) is negative')
plt.grid(True, linestyle='--', alpha=0.7)
plt.xlim(0, 1)
plt.axvline(x=0, color='r', linestyle='--', label='(x=0)')
plt.legend()
plt.show()

# We have 4 samples and 3 classes
# "logits" is the output of the model before applying softmax
logits = torch.tensor([
    [1.5, 0.1, -0.4],
    [0.5, 0.7, 2.1],
    [-2.1, 1.1, 0.8],
    [1.1, 2.5, -1.2]
])

# Softmax normalizes the logits
probs = torch.softmax(logits, dim=1)
probs

tensor([[0.7162, 0.1766, 0.1071],
        [0.1394, 0.1702, 0.6904],
        [0.0229, 0.5613, 0.4158],
        [0.1940, 0.7866, 0.0194]])

# Sum of the row elements is 1
probs.sum(dim=1)

tensor([1.0000, 1.0000, 1.0000, 1.0000])

y = torch.tensor([0, 2, 2, 1])
y_onehot = F.one_hot(y, num_classes=3)

print(y_onehot)

tensor([[1, 0, 0],
        [0, 0, 1],
        [0, 0, 1],
        [0, 1, 0]])

### Training example #1
probs[0], y_onehot[0]

(tensor([0.7162, 0.1766, 0.1071]), tensor([1, 0, 0]))

(-1 * torch.log(tensor(0.7162))) + (-0 * torch.log(tensor(0.1766))) + (-0 * torch.log(tensor(0.1071)))

tensor(0.3338)

### Training example #2
probs[1], y_onehot[1]

(tensor([0.1394, 0.1702, 0.6904]), tensor([0, 0, 1]))

0 + 0 + (-1 * torch.log(tensor(0.6904)))

tensor(0.3705)

### Training example #3
probs[2], y_onehot[2]

(tensor([0.0229, 0.5613, 0.4158]), tensor([0, 0, 1]))

0 + 0 + (-1 * torch.log(tensor(0.4158)))

tensor(0.8776)

### Training example #4
probs[3], y_onehot[3]

(tensor([0.1940, 0.7866, 0.0194]), tensor([0, 1, 0]))

0 + (-1 * torch.log(tensor(0.7866))) + 0

tensor(0.2400)

# Cross Entropy Loss
L = (0.3338 + 0.3705 + 0.8776 + 0.2400) / 4
print(L)

0.455475

def loss_using_python():
    probs = torch.softmax(logits, dim=1)
    n, _ = probs.shape
    no_classes = 3
    loss = 0

    for i in range(n):
        true_label_index = y[i].item()
        one_hot = (F.one_hot(torch.tensor(true_label_index), no_classes))

        one_hot_sum = 0

        for k in range(no_classes):
            neg_log_likelihood = -1 * torch.log(probs[i][k])
            one_hot_sum += one_hot[k] * neg_log_likelihood

        loss += one_hot_sum

    print(f"Loss: {(loss / n)}")


loss_using_python()

Loss: 0.45545268058776855

def manual_cross_entropy(logits, y):
    """
    logits: Is the output of the model before applying softmax
    y: True labels
    """
    probs = F.softmax(logits, dim=1)
    y_onehot = F.one_hot(y)
    train_loss = -torch.sum(y_onehot * torch.log(probs), dim=1)
    avg_loss = torch.mean(train_loss)
    return avg_loss

manual_cross_entropy(logits, y)

tensor(0.4555)

import torch
import torch.nn.functional as F

# We have 4 samples and 3 classes
# "logits" is the output of the model before applying softmax
logits = torch.tensor([
    [1.5, 0.1, -0.4],
    [0.5, 0.7, 2.1],
    [-2.1, 1.1, 0.8],
    [1.1, 2.5, -1.2]
])

y = torch.tensor([0, 2, 2, 1])
y_onehot = F.one_hot(y, num_classes=3)
print(y_onehot)

tensor([[1, 0, 0],
        [0, 0, 1],
        [0, 0, 1],
        [0, 1, 0]])

loss = F.cross_entropy(logits, y)
loss

tensor(0.4555)

Cross Entropy Loss¶

1. Measuring performance of a model¶

2. Calculating loss manually¶

True Labels¶

Cross Entropy Loss¶

2. Calculating loss using Python¶

3. Manually calculating in PyTorch¶

4. Using PyTorch's CrossEntropyLoss¶