From ccbbd775a921f92e5472ccf53317d9e31bd9152d Mon Sep 17 00:00:00 2001
From: xAlpharax <42233094+xAlpharax@users.noreply.github.com>
Date: Sun, 10 Dec 2023 08:27:00 +0200
Subject: Remade some parts of the readme and re-aligned the repo structure.

Changes to be committed:
    modified:   README.md
    renamed:    prototype.py -> ml-integrations/prototype.py
    new file:   testing/.gitkeep
    new file:   training/.gitkeep
---
 README.md                    |  43 ++++++++-----
 ml-integrations/prototype.py | 148 +++++++++++++++++++++++++++++++++++++++++++
 prototype.py                 | 148 -------------------------------------------
 testing/.gitkeep             |   1 +
 training/.gitkeep            |   1 +
 5 files changed, 176 insertions(+), 165 deletions(-)
 create mode 100644 ml-integrations/prototype.py
 delete mode 100644 prototype.py
 create mode 100644 testing/.gitkeep
 create mode 100644 training/.gitkeep

diff --git a/README.md b/README.md
index 22d6d0d..e059de2 100644
--- a/README.md
+++ b/README.md
@@ -10,9 +10,7 @@ Enhancing Learning Resources for University Students
 # FreeFlow
 My team's Code and Notes throughout BEST Brasov Hackathon @ https://hackathon.bestbrasov.ro/
 
-FreeFlow: Enhancing Learning Resources for University Students
-FreeFlow - Learn free in a different flow.
-Learning made easy
+FreeFlow - Learn free with a different flow.
 
 # Description / Proposition
 
@@ -26,21 +24,21 @@ LTI Support: Leveraging the Learning Tools Interoperability standard, FreeFlow s
 
 Node.js Backend: The backend is powered by Node.js, ensuring scalability, efficiency, and a robust server infrastructure.
 
-React Frontend: The frontend is built with React, providing a responsive and dynamic user interface for an optimal user experience.
+React Frontend: The frontend is built with React, providing a responsive and dynamic user interface for an optimal user experience within a single page application soon to be PWA.
 
-Resource Recommendation: FreeFlow employs intelligent algorithms to recommend learning resources based on user behavior, preferences, and academic progress.
+Resource Recommendation: FreeFlow employs intelligent algorithms to recommend learning resources based on user behavior, preferences, and academic progress with the help of Artificial Intelligence.
 
 # Documentation
 
-FreeFlow comes with a simple work-*flow* . Hence, our documentation is straight forward. 
+FreeFlow comes with a simple and straight forward work-*flow* .
 
 ## Requirements
 
-- Node.js (version X.X.X)
-- npm (version X.X.X)
-- MySQL (version X.X.X)
-- React ??
-- stuff
+- Node.js
+- npm
+- MySQL
+- Prisma
+- React
 
 ## Installation & Usage
 
@@ -56,7 +54,7 @@ Navigate to the project directory:
 cd FreeFlow
 ```
 
-Install dependencies:
+Install dependencies in each respective directories:
 
 ```bash
 npm install
@@ -65,16 +63,25 @@ npm install
 Configure environment variables:
 
 Create a .env file in the root directory.
-- Define the required environment variables.
+- Define the required environment variables. (DATABASE_URL="mysql://dbuser:passwd@ip:3306/freeflow")
 - Refer to .env.example for guidance.
 
+Optionally, populate the database with toy data:
+
+```bash
+# npx prisma migrate dev
+# node populate_prisma.js
+```
+
 Start the application:
 
 ```bash
-npm start
+node index.js
 ```
 
-Visit http://localhost:3000 in your browser.
+Visit http://localhost:12000 in your browser.
+
+Feel free to extend it to your deployment solution or other means of serving the application to a wider audience.
 
 ## Contribuiting
 
@@ -84,8 +91,10 @@ Fork the repository.
 Create a new branch for your feature or bug fix.
 Make your changes and commit them with descriptive messages.
 Push your changes to your fork.
-Submit a pull request.
+Submit a pull request with a detailed description to this repo.
+
+And thank you! We will look forward to implementing your changes.
 
 ## License
 
-FreeFlow is software released under the GNU Affero General Public License (AGPL).
+FreeFlow is software released under the GNU Affero General Public License (AGPL) , you can learn more about it [here](https://www.gnu.org/licenses/agpl-3.0.en.html).
diff --git a/ml-integrations/prototype.py b/ml-integrations/prototype.py
new file mode 100644
index 0000000..35461eb
--- /dev/null
+++ b/ml-integrations/prototype.py
@@ -0,0 +1,148 @@
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import Dataset, DataLoader
+
+from sklearn.model_selection import train_test_split
+
+# Sample data - replace this with your dataset
+courses_data = [
+    {"sequence": "python programming", "label": "programming"},
+    {"sequence": "web development with HTML and CSS", "label": "web development"},
+    # Add more data...
+]
+
+# Preprocess the data
+sequences = [d["sequence"] for d in courses_data]
+labels = [d["label"] for d in courses_data]
+
+# Create a mapping from labels to unique indices
+label2index = {label: idx for idx, label in enumerate(set(labels))}
+index2label = {idx: label for label, idx in label2index.items()}
+
+# Convert labels to indices
+label_indices = [label2index[label] for label in labels]
+
+# Split the data into training and testing sets
+train_sequences, test_sequences, train_labels, test_labels = train_test_split(
+    sequences, label_indices, test_size=0.2, random_state=42
+)
+
+# Define a simple RNN model
+class CourseRecommendationModel(nn.Module):
+    def __init__(self, vocab_size, embedding_dim, hidden_size, num_classes):
+        super(CourseRecommendationModel, self).__init__()
+        self.embedding = nn.Embedding(vocab_size, embedding_dim)
+        self.rnn = nn.RNN(embedding_dim, hidden_size, batch_first=True)
+        self.fc = nn.Linear(hidden_size, num_classes)
+
+    def forward(self, x):
+        x = self.embedding(x)
+        _, hn = self.rnn(x)
+        output = self.fc(hn[-1, :, :])
+        return output
+
+##
+# Define a simple Transformer model
+class CourseRecommendationModel(nn.Module):
+    def __init__(self, vocab_size, embedding_dim, hidden_size, num_heads, num_layers, num_classes):
+        super(CourseRecommendationModel, self).__init__()
+        self.embedding = nn.Embedding(vocab_size, embedding_dim)
+        self.transformer = nn.Transformer(
+            d_model=embedding_dim,
+            nhead=num_heads,
+            num_encoder_layers=num_layers,
+            num_decoder_layers=num_layers,
+        )
+        self.fc = nn.Linear(embedding_dim, num_classes)
+
+    def forward(self, x):
+        x = self.embedding(x)
+        x = x.permute(1, 0, 2)  # Change the sequence length dimension
+        output = self.transformer(x)
+        output = output.mean(dim=0)  # Aggregate over the sequence dimension
+        output = self.fc(output)
+        return output
+
+# Hyperparameters (transformer)
+vocab_size = 10000  # Replace with the actual vocabulary size
+embedding_dim = 50
+num_heads = 4
+num_layers = 2
+num_classes = len(set(labels))
+batch_size = 32
+learning_rate = 0.001
+epochs = 10
+##
+
+# Hyperparameters
+vocab_size = 10000  # Replace with the actual vocabulary size
+embedding_dim = 50
+hidden_size = 64
+num_classes = len(set(labels))
+batch_size = 32
+learning_rate = 0.001
+epochs = 10
+
+# Convert sequences to numerical format
+# In a real-world scenario, you might want to use tokenization libraries like spaCy or nltk.
+# For simplicity, we'll represent each word with an index in this example.
+sequence_indices = [[vocab_size // 2 if word == "python" else vocab_size // 3 for word in sequence.split()] for sequence in train_sequences]
+
+# Create DataLoader for training
+class CourseDataset(Dataset):
+    def __init__(self, sequences, labels):
+        self.sequences = sequences
+        self.labels = labels
+
+    def __len__(self):
+        return len(self.sequences)
+
+    def __getitem__(self, idx):
+        return torch.tensor(self.sequences[idx]), torch.tensor(self.labels[idx])
+
+train_dataset = CourseDataset(sequence_indices, train_labels)
+train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
+
+## transformer
+# Initialize the model, loss function, and optimizer
+#model = CourseRecommendationModel(vocab_size, embedding_dim, embedding_dim, num_heads, num_layers, num_classes)
+
+# Initialize the model, loss function, and optimizer
+model = CourseRecommendationModel(vocab_size, embedding_dim, hidden_size, num_classes)
+criterion = nn.CrossEntropyLoss()
+optimizer = optim.Adam(model.parameters(), lr=learning_rate)
+
+# Training loop
+for epoch in range(epochs):
+    for batch_seq, batch_labels in train_loader:
+        optimizer.zero_grad()
+        output = model(batch_seq)
+        loss = criterion(output, batch_labels)
+        loss.backward()
+        optimizer.step()
+
+    print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item()}")
+
+# Save the trained model
+#torch.save(model.state_dict(), 'transformer_course_recommendation_model.pth')
+torch.save(model.state_dict(), 'course_recommendation_model.pth')
+
+# Evaluate on test data (similar preprocessing as done for training data)
+test_sequence_indices = [[vocab_size // 2 if word == "python" else vocab_size // 3 for word in sequence.split()] for sequence in test_sequences]
+test_dataset = CourseDataset(test_sequence_indices, test_labels)
+test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
+
+model.eval()
+correct = 0
+total = 0
+
+with torch.no_grad():
+    for batch_seq, batch_labels in test_loader:
+        output = model(batch_seq)
+        _, predicted = torch.max(output, 1)
+        total += batch_labels.size(0)
+        correct += (predicted == batch_labels).sum().item()
+
+accuracy = correct / total
+print(f"Accuracy on test data: {accuracy * 100:.2f}%")
diff --git a/prototype.py b/prototype.py
deleted file mode 100644
index 35461eb..0000000
--- a/prototype.py
+++ /dev/null
@@ -1,148 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.optim as optim
-from torch.utils.data import Dataset, DataLoader
-
-from sklearn.model_selection import train_test_split
-
-# Sample data - replace this with your dataset
-courses_data = [
-    {"sequence": "python programming", "label": "programming"},
-    {"sequence": "web development with HTML and CSS", "label": "web development"},
-    # Add more data...
-]
-
-# Preprocess the data
-sequences = [d["sequence"] for d in courses_data]
-labels = [d["label"] for d in courses_data]
-
-# Create a mapping from labels to unique indices
-label2index = {label: idx for idx, label in enumerate(set(labels))}
-index2label = {idx: label for label, idx in label2index.items()}
-
-# Convert labels to indices
-label_indices = [label2index[label] for label in labels]
-
-# Split the data into training and testing sets
-train_sequences, test_sequences, train_labels, test_labels = train_test_split(
-    sequences, label_indices, test_size=0.2, random_state=42
-)
-
-# Define a simple RNN model
-class CourseRecommendationModel(nn.Module):
-    def __init__(self, vocab_size, embedding_dim, hidden_size, num_classes):
-        super(CourseRecommendationModel, self).__init__()
-        self.embedding = nn.Embedding(vocab_size, embedding_dim)
-        self.rnn = nn.RNN(embedding_dim, hidden_size, batch_first=True)
-        self.fc = nn.Linear(hidden_size, num_classes)
-
-    def forward(self, x):
-        x = self.embedding(x)
-        _, hn = self.rnn(x)
-        output = self.fc(hn[-1, :, :])
-        return output
-
-##
-# Define a simple Transformer model
-class CourseRecommendationModel(nn.Module):
-    def __init__(self, vocab_size, embedding_dim, hidden_size, num_heads, num_layers, num_classes):
-        super(CourseRecommendationModel, self).__init__()
-        self.embedding = nn.Embedding(vocab_size, embedding_dim)
-        self.transformer = nn.Transformer(
-            d_model=embedding_dim,
-            nhead=num_heads,
-            num_encoder_layers=num_layers,
-            num_decoder_layers=num_layers,
-        )
-        self.fc = nn.Linear(embedding_dim, num_classes)
-
-    def forward(self, x):
-        x = self.embedding(x)
-        x = x.permute(1, 0, 2)  # Change the sequence length dimension
-        output = self.transformer(x)
-        output = output.mean(dim=0)  # Aggregate over the sequence dimension
-        output = self.fc(output)
-        return output
-
-# Hyperparameters (transformer)
-vocab_size = 10000  # Replace with the actual vocabulary size
-embedding_dim = 50
-num_heads = 4
-num_layers = 2
-num_classes = len(set(labels))
-batch_size = 32
-learning_rate = 0.001
-epochs = 10
-##
-
-# Hyperparameters
-vocab_size = 10000  # Replace with the actual vocabulary size
-embedding_dim = 50
-hidden_size = 64
-num_classes = len(set(labels))
-batch_size = 32
-learning_rate = 0.001
-epochs = 10
-
-# Convert sequences to numerical format
-# In a real-world scenario, you might want to use tokenization libraries like spaCy or nltk.
-# For simplicity, we'll represent each word with an index in this example.
-sequence_indices = [[vocab_size // 2 if word == "python" else vocab_size // 3 for word in sequence.split()] for sequence in train_sequences]
-
-# Create DataLoader for training
-class CourseDataset(Dataset):
-    def __init__(self, sequences, labels):
-        self.sequences = sequences
-        self.labels = labels
-
-    def __len__(self):
-        return len(self.sequences)
-
-    def __getitem__(self, idx):
-        return torch.tensor(self.sequences[idx]), torch.tensor(self.labels[idx])
-
-train_dataset = CourseDataset(sequence_indices, train_labels)
-train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
-
-## transformer
-# Initialize the model, loss function, and optimizer
-#model = CourseRecommendationModel(vocab_size, embedding_dim, embedding_dim, num_heads, num_layers, num_classes)
-
-# Initialize the model, loss function, and optimizer
-model = CourseRecommendationModel(vocab_size, embedding_dim, hidden_size, num_classes)
-criterion = nn.CrossEntropyLoss()
-optimizer = optim.Adam(model.parameters(), lr=learning_rate)
-
-# Training loop
-for epoch in range(epochs):
-    for batch_seq, batch_labels in train_loader:
-        optimizer.zero_grad()
-        output = model(batch_seq)
-        loss = criterion(output, batch_labels)
-        loss.backward()
-        optimizer.step()
-
-    print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item()}")
-
-# Save the trained model
-#torch.save(model.state_dict(), 'transformer_course_recommendation_model.pth')
-torch.save(model.state_dict(), 'course_recommendation_model.pth')
-
-# Evaluate on test data (similar preprocessing as done for training data)
-test_sequence_indices = [[vocab_size // 2 if word == "python" else vocab_size // 3 for word in sequence.split()] for sequence in test_sequences]
-test_dataset = CourseDataset(test_sequence_indices, test_labels)
-test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
-
-model.eval()
-correct = 0
-total = 0
-
-with torch.no_grad():
-    for batch_seq, batch_labels in test_loader:
-        output = model(batch_seq)
-        _, predicted = torch.max(output, 1)
-        total += batch_labels.size(0)
-        correct += (predicted == batch_labels).sum().item()
-
-accuracy = correct / total
-print(f"Accuracy on test data: {accuracy * 100:.2f}%")
diff --git a/testing/.gitkeep b/testing/.gitkeep
new file mode 100644
index 0000000..6c59be9
--- /dev/null
+++ b/testing/.gitkeep
@@ -0,0 +1 @@
+coherent dir structure helper
diff --git a/training/.gitkeep b/training/.gitkeep
new file mode 100644
index 0000000..6c59be9
--- /dev/null
+++ b/training/.gitkeep
@@ -0,0 +1 @@
+coherent dir structure helper
-- 
cgit v1.2.3