import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split

# Sample data - replace this with your dataset
courses_data = [
    {"sequence": "python programming", "label": "programming"},
    {"sequence": "web development with HTML and CSS", "label": "web development"},
    # Add more data...
]

# Preprocess the data
sequences = [d["sequence"] for d in courses_data]
labels = [d["label"] for d in courses_data]

# Create a mapping from labels to unique indices
label2index = {label: idx for idx, label in enumerate(set(labels))}
index2label = {idx: label for label, idx in label2index.items()}

# Convert labels to indices
label_indices = [label2index[label] for label in labels]

# Split the data into training and testing sets
train_sequences, test_sequences, train_labels, test_labels = train_test_split(
    sequences, label_indices, test_size=0.2, random_state=42
)

# Define a simple RNN model
class CourseRecommendationModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_size, num_classes):
        super(CourseRecommendationModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = self.embedding(x)
        _, hn = self.rnn(x)
        output = self.fc(hn[-1, :, :])
        return output

##
# Define a simple Transformer model
class CourseRecommendationModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_size, num_heads, num_layers, num_classes):
        super(CourseRecommendationModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.transformer = nn.Transformer(
            d_model=embedding_dim,
            nhead=num_heads,
            num_encoder_layers=num_layers,
            num_decoder_layers=num_layers,
        )
        self.fc = nn.Linear(embedding_dim, num_classes)

    def forward(self, x):
        x = self.embedding(x)
        x = x.permute(1, 0, 2)  # Change the sequence length dimension
        output = self.transformer(x)
        output = output.mean(dim=0)  # Aggregate over the sequence dimension
        output = self.fc(output)
        return output

# Hyperparameters (transformer)
vocab_size = 10000  # Replace with the actual vocabulary size
embedding_dim = 50
num_heads = 4
num_layers = 2
num_classes = len(set(labels))
batch_size = 32
learning_rate = 0.001
epochs = 10
##

# Hyperparameters
vocab_size = 10000  # Replace with the actual vocabulary size
embedding_dim = 50
hidden_size = 64
num_classes = len(set(labels))
batch_size = 32
learning_rate = 0.001
epochs = 10

# Convert sequences to numerical format
# In a real-world scenario, you might want to use tokenization libraries like spaCy or nltk.
# For simplicity, we'll represent each word with an index in this example.
sequence_indices = [[vocab_size // 2 if word == "python" else vocab_size // 3 for word in sequence.split()] for sequence in train_sequences]

# Create DataLoader for training
class CourseDataset(Dataset):
    def __init__(self, sequences, labels):
        self.sequences = sequences
        self.labels = labels

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        return torch.tensor(self.sequences[idx]), torch.tensor(self.labels[idx])

train_dataset = CourseDataset(sequence_indices, train_labels)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

## transformer
# Initialize the model, loss function, and optimizer
#model = CourseRecommendationModel(vocab_size, embedding_dim, embedding_dim, num_heads, num_layers, num_classes)

# Initialize the model, loss function, and optimizer
model = CourseRecommendationModel(vocab_size, embedding_dim, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    for batch_seq, batch_labels in train_loader:
        optimizer.zero_grad()
        output = model(batch_seq)
        loss = criterion(output, batch_labels)
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item()}")

# Save the trained model
#torch.save(model.state_dict(), 'transformer_course_recommendation_model.pth')
torch.save(model.state_dict(), 'course_recommendation_model.pth')

# Evaluate on test data (similar preprocessing as done for training data)
test_sequence_indices = [[vocab_size // 2 if word == "python" else vocab_size // 3 for word in sequence.split()] for sequence in test_sequences]
test_dataset = CourseDataset(test_sequence_indices, test_labels)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

model.eval()
correct = 0
total = 0

with torch.no_grad():
    for batch_seq, batch_labels in test_loader:
        output = model(batch_seq)
        _, predicted = torch.max(output, 1)
        total += batch_labels.size(0)
        correct += (predicted == batch_labels).sum().item()

accuracy = correct / total
print(f"Accuracy on test data: {accuracy * 100:.2f}%")