From ccbbd775a921f92e5472ccf53317d9e31bd9152d Mon Sep 17 00:00:00 2001 From: xAlpharax <42233094+xAlpharax@users.noreply.github.com> Date: Sun, 10 Dec 2023 08:27:00 +0200 Subject: Remade some parts of the readme and re-aligned the repo structure. Changes to be committed: modified: README.md renamed: prototype.py -> ml-integrations/prototype.py new file: testing/.gitkeep new file: training/.gitkeep --- README.md | 43 ++++++++----- ml-integrations/prototype.py | 148 +++++++++++++++++++++++++++++++++++++++++++ prototype.py | 148 ------------------------------------------- testing/.gitkeep | 1 + training/.gitkeep | 1 + 5 files changed, 176 insertions(+), 165 deletions(-) create mode 100644 ml-integrations/prototype.py delete mode 100644 prototype.py create mode 100644 testing/.gitkeep create mode 100644 training/.gitkeep diff --git a/README.md b/README.md index 22d6d0d..e059de2 100644 --- a/README.md +++ b/README.md @@ -10,9 +10,7 @@ Enhancing Learning Resources for University Students # FreeFlow My team's Code and Notes throughout BEST Brasov Hackathon @ https://hackathon.bestbrasov.ro/ -FreeFlow: Enhancing Learning Resources for University Students -FreeFlow - Learn free in a different flow. -Learning made easy +FreeFlow - Learn free with a different flow. # Description / Proposition @@ -26,21 +24,21 @@ LTI Support: Leveraging the Learning Tools Interoperability standard, FreeFlow s Node.js Backend: The backend is powered by Node.js, ensuring scalability, efficiency, and a robust server infrastructure. -React Frontend: The frontend is built with React, providing a responsive and dynamic user interface for an optimal user experience. +React Frontend: The frontend is built with React, providing a responsive and dynamic user interface for an optimal user experience within a single page application soon to be PWA. -Resource Recommendation: FreeFlow employs intelligent algorithms to recommend learning resources based on user behavior, preferences, and academic progress. +Resource Recommendation: FreeFlow employs intelligent algorithms to recommend learning resources based on user behavior, preferences, and academic progress with the help of Artificial Intelligence. # Documentation -FreeFlow comes with a simple work-*flow* . Hence, our documentation is straight forward. +FreeFlow comes with a simple and straight forward work-*flow* . ## Requirements -- Node.js (version X.X.X) -- npm (version X.X.X) -- MySQL (version X.X.X) -- React ?? -- stuff +- Node.js +- npm +- MySQL +- Prisma +- React ## Installation & Usage @@ -56,7 +54,7 @@ Navigate to the project directory: cd FreeFlow ``` -Install dependencies: +Install dependencies in each respective directories: ```bash npm install @@ -65,16 +63,25 @@ npm install Configure environment variables: Create a .env file in the root directory. -- Define the required environment variables. +- Define the required environment variables. (DATABASE_URL="mysql://dbuser:passwd@ip:3306/freeflow") - Refer to .env.example for guidance. +Optionally, populate the database with toy data: + +```bash +# npx prisma migrate dev +# node populate_prisma.js +``` + Start the application: ```bash -npm start +node index.js ``` -Visit http://localhost:3000 in your browser. +Visit http://localhost:12000 in your browser. + +Feel free to extend it to your deployment solution or other means of serving the application to a wider audience. ## Contribuiting @@ -84,8 +91,10 @@ Fork the repository. Create a new branch for your feature or bug fix. Make your changes and commit them with descriptive messages. Push your changes to your fork. -Submit a pull request. +Submit a pull request with a detailed description to this repo. + +And thank you! We will look forward to implementing your changes. ## License -FreeFlow is software released under the GNU Affero General Public License (AGPL). +FreeFlow is software released under the GNU Affero General Public License (AGPL) , you can learn more about it [here](https://www.gnu.org/licenses/agpl-3.0.en.html). diff --git a/ml-integrations/prototype.py b/ml-integrations/prototype.py new file mode 100644 index 0000000..35461eb --- /dev/null +++ b/ml-integrations/prototype.py @@ -0,0 +1,148 @@ +import torch +import torch.nn as nn +import torch.optim as optim +from torch.utils.data import Dataset, DataLoader + +from sklearn.model_selection import train_test_split + +# Sample data - replace this with your dataset +courses_data = [ + {"sequence": "python programming", "label": "programming"}, + {"sequence": "web development with HTML and CSS", "label": "web development"}, + # Add more data... +] + +# Preprocess the data +sequences = [d["sequence"] for d in courses_data] +labels = [d["label"] for d in courses_data] + +# Create a mapping from labels to unique indices +label2index = {label: idx for idx, label in enumerate(set(labels))} +index2label = {idx: label for label, idx in label2index.items()} + +# Convert labels to indices +label_indices = [label2index[label] for label in labels] + +# Split the data into training and testing sets +train_sequences, test_sequences, train_labels, test_labels = train_test_split( + sequences, label_indices, test_size=0.2, random_state=42 +) + +# Define a simple RNN model +class CourseRecommendationModel(nn.Module): + def __init__(self, vocab_size, embedding_dim, hidden_size, num_classes): + super(CourseRecommendationModel, self).__init__() + self.embedding = nn.Embedding(vocab_size, embedding_dim) + self.rnn = nn.RNN(embedding_dim, hidden_size, batch_first=True) + self.fc = nn.Linear(hidden_size, num_classes) + + def forward(self, x): + x = self.embedding(x) + _, hn = self.rnn(x) + output = self.fc(hn[-1, :, :]) + return output + +## +# Define a simple Transformer model +class CourseRecommendationModel(nn.Module): + def __init__(self, vocab_size, embedding_dim, hidden_size, num_heads, num_layers, num_classes): + super(CourseRecommendationModel, self).__init__() + self.embedding = nn.Embedding(vocab_size, embedding_dim) + self.transformer = nn.Transformer( + d_model=embedding_dim, + nhead=num_heads, + num_encoder_layers=num_layers, + num_decoder_layers=num_layers, + ) + self.fc = nn.Linear(embedding_dim, num_classes) + + def forward(self, x): + x = self.embedding(x) + x = x.permute(1, 0, 2) # Change the sequence length dimension + output = self.transformer(x) + output = output.mean(dim=0) # Aggregate over the sequence dimension + output = self.fc(output) + return output + +# Hyperparameters (transformer) +vocab_size = 10000 # Replace with the actual vocabulary size +embedding_dim = 50 +num_heads = 4 +num_layers = 2 +num_classes = len(set(labels)) +batch_size = 32 +learning_rate = 0.001 +epochs = 10 +## + +# Hyperparameters +vocab_size = 10000 # Replace with the actual vocabulary size +embedding_dim = 50 +hidden_size = 64 +num_classes = len(set(labels)) +batch_size = 32 +learning_rate = 0.001 +epochs = 10 + +# Convert sequences to numerical format +# In a real-world scenario, you might want to use tokenization libraries like spaCy or nltk. +# For simplicity, we'll represent each word with an index in this example. +sequence_indices = [[vocab_size // 2 if word == "python" else vocab_size // 3 for word in sequence.split()] for sequence in train_sequences] + +# Create DataLoader for training +class CourseDataset(Dataset): + def __init__(self, sequences, labels): + self.sequences = sequences + self.labels = labels + + def __len__(self): + return len(self.sequences) + + def __getitem__(self, idx): + return torch.tensor(self.sequences[idx]), torch.tensor(self.labels[idx]) + +train_dataset = CourseDataset(sequence_indices, train_labels) +train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) + +## transformer +# Initialize the model, loss function, and optimizer +#model = CourseRecommendationModel(vocab_size, embedding_dim, embedding_dim, num_heads, num_layers, num_classes) + +# Initialize the model, loss function, and optimizer +model = CourseRecommendationModel(vocab_size, embedding_dim, hidden_size, num_classes) +criterion = nn.CrossEntropyLoss() +optimizer = optim.Adam(model.parameters(), lr=learning_rate) + +# Training loop +for epoch in range(epochs): + for batch_seq, batch_labels in train_loader: + optimizer.zero_grad() + output = model(batch_seq) + loss = criterion(output, batch_labels) + loss.backward() + optimizer.step() + + print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item()}") + +# Save the trained model +#torch.save(model.state_dict(), 'transformer_course_recommendation_model.pth') +torch.save(model.state_dict(), 'course_recommendation_model.pth') + +# Evaluate on test data (similar preprocessing as done for training data) +test_sequence_indices = [[vocab_size // 2 if word == "python" else vocab_size // 3 for word in sequence.split()] for sequence in test_sequences] +test_dataset = CourseDataset(test_sequence_indices, test_labels) +test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) + +model.eval() +correct = 0 +total = 0 + +with torch.no_grad(): + for batch_seq, batch_labels in test_loader: + output = model(batch_seq) + _, predicted = torch.max(output, 1) + total += batch_labels.size(0) + correct += (predicted == batch_labels).sum().item() + +accuracy = correct / total +print(f"Accuracy on test data: {accuracy * 100:.2f}%") diff --git a/prototype.py b/prototype.py deleted file mode 100644 index 35461eb..0000000 --- a/prototype.py +++ /dev/null @@ -1,148 +0,0 @@ -import torch -import torch.nn as nn -import torch.optim as optim -from torch.utils.data import Dataset, DataLoader - -from sklearn.model_selection import train_test_split - -# Sample data - replace this with your dataset -courses_data = [ - {"sequence": "python programming", "label": "programming"}, - {"sequence": "web development with HTML and CSS", "label": "web development"}, - # Add more data... -] - -# Preprocess the data -sequences = [d["sequence"] for d in courses_data] -labels = [d["label"] for d in courses_data] - -# Create a mapping from labels to unique indices -label2index = {label: idx for idx, label in enumerate(set(labels))} -index2label = {idx: label for label, idx in label2index.items()} - -# Convert labels to indices -label_indices = [label2index[label] for label in labels] - -# Split the data into training and testing sets -train_sequences, test_sequences, train_labels, test_labels = train_test_split( - sequences, label_indices, test_size=0.2, random_state=42 -) - -# Define a simple RNN model -class CourseRecommendationModel(nn.Module): - def __init__(self, vocab_size, embedding_dim, hidden_size, num_classes): - super(CourseRecommendationModel, self).__init__() - self.embedding = nn.Embedding(vocab_size, embedding_dim) - self.rnn = nn.RNN(embedding_dim, hidden_size, batch_first=True) - self.fc = nn.Linear(hidden_size, num_classes) - - def forward(self, x): - x = self.embedding(x) - _, hn = self.rnn(x) - output = self.fc(hn[-1, :, :]) - return output - -## -# Define a simple Transformer model -class CourseRecommendationModel(nn.Module): - def __init__(self, vocab_size, embedding_dim, hidden_size, num_heads, num_layers, num_classes): - super(CourseRecommendationModel, self).__init__() - self.embedding = nn.Embedding(vocab_size, embedding_dim) - self.transformer = nn.Transformer( - d_model=embedding_dim, - nhead=num_heads, - num_encoder_layers=num_layers, - num_decoder_layers=num_layers, - ) - self.fc = nn.Linear(embedding_dim, num_classes) - - def forward(self, x): - x = self.embedding(x) - x = x.permute(1, 0, 2) # Change the sequence length dimension - output = self.transformer(x) - output = output.mean(dim=0) # Aggregate over the sequence dimension - output = self.fc(output) - return output - -# Hyperparameters (transformer) -vocab_size = 10000 # Replace with the actual vocabulary size -embedding_dim = 50 -num_heads = 4 -num_layers = 2 -num_classes = len(set(labels)) -batch_size = 32 -learning_rate = 0.001 -epochs = 10 -## - -# Hyperparameters -vocab_size = 10000 # Replace with the actual vocabulary size -embedding_dim = 50 -hidden_size = 64 -num_classes = len(set(labels)) -batch_size = 32 -learning_rate = 0.001 -epochs = 10 - -# Convert sequences to numerical format -# In a real-world scenario, you might want to use tokenization libraries like spaCy or nltk. -# For simplicity, we'll represent each word with an index in this example. -sequence_indices = [[vocab_size // 2 if word == "python" else vocab_size // 3 for word in sequence.split()] for sequence in train_sequences] - -# Create DataLoader for training -class CourseDataset(Dataset): - def __init__(self, sequences, labels): - self.sequences = sequences - self.labels = labels - - def __len__(self): - return len(self.sequences) - - def __getitem__(self, idx): - return torch.tensor(self.sequences[idx]), torch.tensor(self.labels[idx]) - -train_dataset = CourseDataset(sequence_indices, train_labels) -train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) - -## transformer -# Initialize the model, loss function, and optimizer -#model = CourseRecommendationModel(vocab_size, embedding_dim, embedding_dim, num_heads, num_layers, num_classes) - -# Initialize the model, loss function, and optimizer -model = CourseRecommendationModel(vocab_size, embedding_dim, hidden_size, num_classes) -criterion = nn.CrossEntropyLoss() -optimizer = optim.Adam(model.parameters(), lr=learning_rate) - -# Training loop -for epoch in range(epochs): - for batch_seq, batch_labels in train_loader: - optimizer.zero_grad() - output = model(batch_seq) - loss = criterion(output, batch_labels) - loss.backward() - optimizer.step() - - print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item()}") - -# Save the trained model -#torch.save(model.state_dict(), 'transformer_course_recommendation_model.pth') -torch.save(model.state_dict(), 'course_recommendation_model.pth') - -# Evaluate on test data (similar preprocessing as done for training data) -test_sequence_indices = [[vocab_size // 2 if word == "python" else vocab_size // 3 for word in sequence.split()] for sequence in test_sequences] -test_dataset = CourseDataset(test_sequence_indices, test_labels) -test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) - -model.eval() -correct = 0 -total = 0 - -with torch.no_grad(): - for batch_seq, batch_labels in test_loader: - output = model(batch_seq) - _, predicted = torch.max(output, 1) - total += batch_labels.size(0) - correct += (predicted == batch_labels).sum().item() - -accuracy = correct / total -print(f"Accuracy on test data: {accuracy * 100:.2f}%") diff --git a/testing/.gitkeep b/testing/.gitkeep new file mode 100644 index 0000000..6c59be9 --- /dev/null +++ b/testing/.gitkeep @@ -0,0 +1 @@ +coherent dir structure helper diff --git a/training/.gitkeep b/training/.gitkeep new file mode 100644 index 0000000..6c59be9 --- /dev/null +++ b/training/.gitkeep @@ -0,0 +1 @@ +coherent dir structure helper -- cgit v1.2.3