CIDAUT AI Fake Scene Classification 2024
Description:
Your task is to develop a neural network or apply a suitable algorithm to classify whether an image of a driving scenario is real or fake. The images are provided in RGB format and compressed as JPEG files. Each image is labeled with 1 for real and 0 for fake, indicating a binary classification problem. You are free to create your own train-validation split for model training and evaluation. However, for the test images, the labels are not available; refer to the sample_submission.csv file in the Data section for submission formatting. The code must be written in Python, and you can utilize frameworks such as TensorFlow, Keras, or PyTorch. Additionally, you are allowed to leverage public GitHub repositories, pre-trained models, and other publicly available datasets to enhance your solution.
Training model:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, random_split, Subset
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.model_selection import StratifiedKFold
from torchmetrics.classification import BinaryF1Score
import pandas as pd
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import os
from tqdm import tqdm
from transformers import AutoImageProcessor, AutoModelForImageClassification
# Focal Loss implementation
class FocalLoss(nn.Module):
def __init__(self, alpha=0.25, gamma=2):
super(FocalLoss, self).__init__()
self.alpha = alpha
self.gamma = gamma
def forward(self, inputs, targets):
BCE_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction='none')
pt = torch.exp(-BCE_loss)
F_loss = self.alpha * (1-pt)**self.gamma * BCE_loss
return torch.mean(F_loss)
# Custom dataset
class CustomImageDataset(Dataset):
def __init__(self, csv_file, img_dir, transform=None):
self.annotations = csv_file
self.img_dir = img_dir
self.transform = transform
def __len__(self):
return len(self.annotations)
def __getitem__(self, idx):
img_path = os.path.join(self.img_dir, self.annotations.iloc[idx, 0])
image = Image.open(img_path).convert("RGB")
label = torch.tensor(int(self.annotations.iloc[idx, 1]))
if self.transform:
image = self.transform(image)
return image, label
# Data augmentation transformations
def get_transform(img_size=(512, 512)):
return transforms.Compose([
transforms.Resize(img_size),
transforms.RandomHorizontalFlip(),
transforms.RandomVerticalFlip(),
transforms.RandomRotation(15),
transforms.ColorJitter(brightness=0.2, contrast=0.2),
transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
transforms.ToTensor(),
])
# Create data loaders
def create_dataloaders(csv_file, img_dir, img_size=(512, 512), batch_size=32, n_fold=0):
transform = get_transform(img_size)
dataset = CustomImageDataset(csv_file=csv_file, img_dir=img_dir, transform=transform)
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
for i, (train_index, val_index) in enumerate(skf.split(np.zeros(len(csv_file)), csv_file.iloc[:, 1].values)):
if i == n_fold:
break
train_dataset = Subset(dataset, train_index)
val_dataset = Subset(dataset, val_index)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)
return train_loader, val_loader
# Single epoch training
def train_one_epoch(model, train_loader, criterion, optimizer, device):
model.train()
running_loss = 0.0
for images, labels in tqdm(train_loader, desc="Training"):
images, labels = images.to(device), labels.to(device).float().unsqueeze(1)
optimizer.zero_grad()
outputs = model(images).logits[:, :1]
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item() * images.size(0)
return running_loss / len(train_loader.dataset)
# Validation
def validate(model, val_loader, criterion, device):
model.eval()
running_loss = 0.0
all_labels = []
all_outputs = []
with torch.no_grad():
for images, labels in tqdm(val_loader, desc="Validation"):
images, labels = images.to(device), labels.to(device).float().unsqueeze(1)
outputs = model(images).logits[:, :1]
loss = criterion(outputs, labels)
running_loss += loss.item() * images.size(0)
all_labels.append(labels.cpu().numpy())
all_outputs.append(outputs.cpu().numpy())
epoch_loss = running_loss / len(val_loader.dataset)
all_labels = np.concatenate(all_labels)
all_outputs = np.concatenate(all_outputs)
all_outputs = torch.sigmoid(torch.tensor(all_outputs)).numpy()
return epoch_loss, all_labels, all_outputs
# Early stopping
class EarlyStopping:
def __init__(self, patience, verbose=False, delta=0):
self.patience = patience
self.verbose = verbose
self.counter = 0
self.best_score = None
self.early_stop = False
self.val_loss_min = np.Inf
self.delta = delta
def __call__(self, val_loss, model, path):
score = -val_loss
if self.best_score is None:
self.best_score = score
self.save_checkpoint(val_loss, model, path)
elif score < self.best_score + self.delta:
self.counter += 1
print(f'Early stopping count: {self.counter} / {self.patience}')
if self.counter >= self.patience:
self.early_stop = True
else:
self.best_score = score
self.save_checkpoint(val_loss, model, path)
self.counter = 0
def save_checkpoint(self, val_loss, model, path):
if self.verbose:
print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model...')
torch.save(model.state_dict(), os.path.join(path, 'checkpoint1.pth'))
self.val_loss_min = val_loss
# Main training function
def train_model(csv_file, img_dir, model, model_name, img_size=(512, 512),
num_epochs=100, batch_size=2, lr=1e-5, n_fold=0,
device='cuda', patience=10):
train_loader, val_loader = create_dataloaders(csv_file, img_dir,
img_size=img_size,
batch_size=batch_size,
n_fold=n_fold)
model = model.to(device)
# Compute class weights
class_counts = csv_file['label'].value_counts()
total_samples = len(csv_file)
class_weights = total_samples / (2 * class_counts)
pos_weight = torch.tensor(class_weights[1]).to(device)
# Loss function
criterion = FocalLoss()
# Optimizer
optimizer = optim.AdamW(model.parameters(),
lr=lr,
weight_decay=1e-5) # L2 regularization
# Learning rate scheduler
scheduler = ReduceLROnPlateau(optimizer,
mode='min',
factor=0.5,
patience=3,
min_lr=1e-6,
verbose=True)
early_stopping = EarlyStopping(patience=patience, verbose=True)
train_losses, val_losses = [], []
path = f'{model_name}{n_fold}'
os.makedirs(path, exist_ok=True)
for epoch in range(num_epochs):
print(f'Epoch {epoch+1}/{num_epochs}')
train_loss = train_one_epoch(model, train_loader, criterion, optimizer, device)
val_loss, val_labels, val_outputs = validate(model, val_loader, criterion, device)
train_losses.append(train_loss)
val_losses.append(val_loss)
# Performance metrics
val_preds = (val_outputs > 0.5).astype(int)
accuracy = accuracy_score(val_labels, val_preds)
f1 = f1_score(val_labels, val_preds)
roc_auc = roc_auc_score(val_labels, val_outputs)
print(f'Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')
print(f'Accuracy: {accuracy:.4f}, F1 Score: {f1:.4f}, ROC AUC: {roc_auc:.4f}')
# Learning rate adjustment
scheduler.step(val_loss)
# Early stopping
early_stopping(val_loss, model, path)
if early_stopping.early_stop:
print("Early stopping triggered")
break
# Plot loss curve
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='train loss')
plt.plot(val_losses, label='val loss')
plt.title('Model training loss')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend()
plt.savefig(os.path.join(path, 'loss_plot.png'))
plt.close()
return model
# Main execution
if __name__ == '__main__':
# Load pretrained model
processor = AutoImageProcessor.from_pretrained("microsoft/beit-large-patch16-512")
model = AutoModelForImageClassification.from_pretrained("microsoft/beit-large-patch16-512")
# Load labels
labels = pd.read_csv("train.csv")
labels["label"] = labels["label"].map({"editada": 0, "real": 1})
img_dir = "Train"
# Training parameters
batch_size = 2
lr = 1e-5
img_size = (512, 512)
n_fold = 0
# Start training
trained_model = train_model(
labels,
img_dir,
model,
'microsoft/beit-large-patch16-512',
img_size=img_size,
num_epochs=100,
batch_size=batch_size,
lr=lr,
n_fold=n_fold,
patience=10
)
# Clear GPU memory
torch.cuda.empty_cache()
Make predictions:
import torch
import pandas as pd
import os
from PIL import Image
from transformers import AutoImageProcessor, AutoModelForImageClassification
def create_submission(model_path, test_dir, output_csv='submission.csv'):
# Check if the device supports GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device in use: {device}")
# Load the image processor and model
processor = AutoImageProcessor.from_pretrained("microsoft/beit-large-patch16-512")
model = AutoModelForImageClassification.from_pretrained("microsoft/beit-large-patch16-512")
# Load the trained weights
model.load_state_dict(torch.load(os.path.join(model_path, 'checkpoint1.pth')))
model = model.to(device) # Move model to GPU
model.eval() # Set model to evaluation mode
# Prepare a list to store prediction results
predictions = []
# Iterate over images in the test directory
for filename in sorted(os.listdir(test_dir)):
if filename.endswith(('.jpg', '.png', '.jpeg')):
# Full image path
image_path = os.path.join(test_dir, filename)
# Open and convert the image
image = Image.open(image_path).convert("RGB")
# Preprocess the image
inputs = processor(image, return_tensors="pt").to(device) # Transfer data to GPU
# Make predictions
with torch.no_grad():
outputs = model(**inputs)
probabilities = torch.sigmoid(outputs.logits).cpu().numpy()[0][0] # Transfer output back to CPU for further processing
# Convert probabilities to class (1 for real, 0 for fake)
predicted_class = 1 if probabilities > 0.5 else 0
# Add to prediction results
predictions.append({'image': filename, 'label': predicted_class})
# Create a DataFrame
submission_df = pd.DataFrame(predictions)
# Save as CSV
submission_df.to_csv(output_csv, index=False)
print(f"Submission CSV saved to {output_csv}")
print("Preview of prediction results:")
print(submission_df)
# Example usage
model_path = 'microsoft/beit-large-patch16-5120' # Folder containing model weights
test_dir = 'Test' # Folder containing test images
create_submission(model_path, test_dir)