Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 33 additions & 8 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,25 @@
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the student files
student_files = [doc for doc in os.listdir() if doc.endswith('.txt')]
student_notes = [open(_file, encoding='utf-8').read()
for _file in student_files]
student_notes = [open(_file, encoding='utf-8').read() for _file in student_files]

# Vectorize the text
def vectorize(Text):
return TfidfVectorizer().fit_transform(Text).toarray()

def vectorize(Text): return TfidfVectorizer().fit_transform(Text).toarray()
def similarity(doc1, doc2): return cosine_similarity([doc1, doc2])

def similarity(doc1, doc2):
return cosine_similarity([doc1, doc2])

vectors = vectorize(student_notes)
s_vectors = list(zip(student_files, vectors))
plagiarism_results = set()


# Check plagiarism
def check_plagiarism():
global s_vectors
for student_a, text_vector_a in s_vectors:
Expand All @@ -29,6 +33,27 @@ def check_plagiarism():
plagiarism_results.add(score)
return plagiarism_results

# Visualize plagiarism results
def visualize_results(plagiarism_results):
# Create a matrix for the heatmap
files = sorted(list(set([pair[0] for pair in plagiarism_results] + [pair[1] for pair in plagiarism_results])))
matrix = [[0 for _ in files] for _ in files]

file_index = {file: idx for idx, file in enumerate(files)}

for file_a, file_b, score in plagiarism_results:
i, j = file_index[file_a], file_index[file_b]
matrix[i][j] = score
matrix[j][i] = score

# Plot the heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(matrix, xticklabels=files, yticklabels=files, cmap='coolwarm', annot=True)
plt.title('Plagiarism Heatmap')
plt.show()

# Check for plagiarism and visualize the results
plagiarism_results = check_plagiarism()
visualize_results(plagiarism_results)


for data in check_plagiarism():
print(data)
5 changes: 4 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
scikit_learn==0.24.2
scikit-learn==0.24.2
matplotlib==3.4.2
seaborn==0.11.1