From d1ae0bc201ff2a66b490c86766d96449d999c0f7 Mon Sep 17 00:00:00 2001
From: alvaroof <ortiz.fernandez.alvaro@gmail.com>
Date: Mon, 25 Dec 2023 11:08:45 +0000
Subject: [PATCH 1/7] Add openaiapi

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index 7665a74..0a9c24f 100755
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -55,6 +55,7 @@ python-dotenv = "0.20.0"
 radon = "5.1.0"
 semver = "2.13.0"
 snakeviz = "2.1.1"
+openai = ">1"
 
 [tool.poetry.group.docs.dependencies]
 furo = "^2022.12.7"

From 6053bae15b4c18164c176f22327431bb3c80fe80 Mon Sep 17 00:00:00 2001
From: alvaroof <ortiz.fernandez.alvaro@gmail.com>
Date: Mon, 25 Dec 2023 11:20:42 +0000
Subject: [PATCH 2/7] Update gitignore

---
 .gitignore | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/.gitignore b/.gitignore
index 831657a..03ffd2c 100755
--- a/.gitignore
+++ b/.gitignore
@@ -129,3 +129,18 @@ data/*
 
 # Testmon data file
 .testmondata
+
+# Common file formats
+*.ipynb
+*.html
+*.png
+*.yml
+*.pdf
+*.svg
+*.eps
+*.jpg
+*.fig
+*.tex
+
+# No notebooks
+notebooks/

From 09049eccf053fb610bb23180081290cd2e270ac8 Mon Sep 17 00:00:00 2001
From: alvaroof <ortiz.fernandez.alvaro@gmail.com>
Date: Mon, 25 Dec 2023 11:21:06 +0000
Subject: [PATCH 3/7] OpenAI Quickstart

---
 scripts/main.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 scripts/main.py

diff --git a/scripts/main.py b/scripts/main.py
new file mode 100644
index 0000000..8d67aea
--- /dev/null
+++ b/scripts/main.py
@@ -0,0 +1,23 @@
+# -*- coding: utf-8 -*-
+from dotenv import load_dotenv
+from openai import OpenAI
+
+load_dotenv()
+
+if __name__ == "__main__":
+    client = OpenAI()
+    completion = client.chat.completions.create(
+        model="gpt-3.5-turbo",
+        messages=[
+            {
+                "role": "system",
+                "content": "You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.",
+            },
+            {
+                "role": "user",
+                "content": "Compose a poem that explains the concept of recursion in programming.",
+            },
+        ],
+    )
+
+    print(completion.choices[0].message)

From d8109293ff826275da25618db79bc70d7d3c90c8 Mon Sep 17 00:00:00 2001
From: alvaroof <ortiz.fernandez.alvaro@gmail.com>
Date: Thu, 28 Dec 2023 20:16:05 +0000
Subject: [PATCH 4/7] Add embeddingservice

---
 .../embeddingservice.py                       | 70 +++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 scripts/question_answering_on_pdf/embeddingservice.py

diff --git a/scripts/question_answering_on_pdf/embeddingservice.py b/scripts/question_answering_on_pdf/embeddingservice.py
new file mode 100644
index 0000000..c81510b
--- /dev/null
+++ b/scripts/question_answering_on_pdf/embeddingservice.py
@@ -0,0 +1,70 @@
+# -*- coding: utf-8 -*-
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import numpy as np
+import openai
+from pypdf import PdfReader
+
+
+class EmbeddingService:
+    def __init__(self, pdf_path: str):
+        self.openai_client = openai.OpenAI()
+        self.pdf_path = pdf_path
+
+        self.parsed_chunks = None
+        self.embeddings = None
+
+    def read_pdf(self, chunk_length: int):
+        reader = PdfReader(self.pdf_path)
+        chunks = []
+        for page in reader.pages:
+            text_page = page.extract_text()
+            chunks.extend(
+                [
+                    text_page[i : i + chunk_length].replace("\n", " ")
+                    for i in range(0, len(text_page), chunk_length)
+                ]
+            )
+        self.parsed_chunks = chunks
+
+    def get_embeddings(self, model="text-embedding-ada-002"):
+        chunks = self.parsed_chunks
+        if not isinstance(self.parsed_chunks, list):
+            chunks = [self.parsed_chunks]
+        self.embeddings = self.openai_client.embeddings.create(
+            input=chunks, model=model
+        )  # .data[0:len(chunks)].embedding
+
+    def test_openai_client(self):
+        completion = self.openai_client.chat.completions.create(
+            model="gpt-3.5-turbo",
+            messages=[
+                {
+                    "role": "system",
+                    "content": "You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.",
+                },
+                {
+                    "role": "user",
+                    "content": "Compose a poem that explains the concept of recursion in programming.",
+                },
+            ],
+        )
+
+        print(completion.choices[0].message)
+
+
+if __name__ == "__main__":
+    embed_service = EmbeddingService(pdf_path="data/pdf-example.pdf")
+    embed_service.read_pdf(1000)
+    print(embed_service.parsed_chunks[-1])
+    embed_service.get_embeddings()
+    embeddings = embed_service.embeddings
+    parsed_chunks = embed_service.parsed_chunks
+    print(
+        f"We should have {len(embeddings.data)} different vector embeddings for {len(parsed_chunks)} different parsed chunks."
+    )
+
+    # df['ada_embedding'] = df.combined.apply(lambda x: get_embedding(x, model='text-embedding-ada-002'))
+    # df.to_csv('output/embedded_1k_reviews.csv', index=False)

From 28c9ed66ac7dca54435bfb0c2b22a992b05af5cc Mon Sep 17 00:00:00 2001
From: Alvaro Ortiz <ortiz.fernandez.alvaro@gmail.com>
Date: Wed, 10 Jan 2024 17:43:34 +0000
Subject: [PATCH 5/7] Add some code on question pdf answering program

---
 .../question_answering_on_pdf/dataservice.py  | 120 ++++++++++++++++++
 .../intentservice.py                          |  18 +++
 .../responseservice.py                        |  18 +++
 scripts/question_answering_on_pdf/run.py      |  32 +++++
 4 files changed, 188 insertions(+)
 create mode 100644 scripts/question_answering_on_pdf/dataservice.py
 create mode 100644 scripts/question_answering_on_pdf/intentservice.py
 create mode 100644 scripts/question_answering_on_pdf/responseservice.py
 create mode 100644 scripts/question_answering_on_pdf/run.py

diff --git a/scripts/question_answering_on_pdf/dataservice.py b/scripts/question_answering_on_pdf/dataservice.py
new file mode 100644
index 0000000..31dd943
--- /dev/null
+++ b/scripts/question_answering_on_pdf/dataservice.py
@@ -0,0 +1,120 @@
+import numpy as np
+import openai
+from pypdf import PdfReader
+from redis.commands.search.field import TextField, VectorField
+from redis.commands.search.indexDefinition import IndexDefinition, IndexType
+from redis.commands.search.query import Query
+
+import redis
+
+INDEX_NAME = "embeddings-index"           # name of the search index
+PREFIX = "doc"                            # prefix for the document keys
+# distance metric for the vectors (ex. COSINE, IP, L2)
+DISTANCE_METRIC = "COSINE"
+
+REDIS_HOST = "localhost"
+REDIS_PORT = 6379
+REDIS_PASSWORD = "" 
+
+class DataService():
+
+    def __init__(self):
+        # Connect to Redis
+        self.redis_client = redis.Redis(
+            host=REDIS_HOST,
+            port=REDIS_PORT,
+            password=REDIS_PASSWORD
+        )
+
+    def drop_redis_data(self, index_name: str = INDEX_NAME):
+        try:
+            self.redis_client.ft(index_name).dropindex()
+            print('Index dropped')
+        except:
+            # Index doees not exist
+            print('Index does not exist')
+
+    def load_data_to_redis(self, embeddings):
+        # Constants
+        vector_dim = len(embeddings[0]['vector'])  # length of the vectors
+        
+		# Initial number of vectors
+        vector_number = len(embeddings)
+
+        # Define RediSearch fields
+        text = TextField(name="text")
+        text_embedding = VectorField("vector",
+                                     "FLAT", {
+                                         "TYPE": "FLOAT32",
+                                         "DIM": vector_dim,
+                                         "DISTANCE_METRIC": "COSINE",
+                                         "INITIAL_CAP": vector_number,
+                                     }
+                                     )
+        fields = [text, text_embedding]
+
+        # Check if index exists
+        try:
+            self.redis_client.ft(INDEX_NAME).info()
+            print("Index already exists")
+        except:
+            # Create RediSearch Index
+            self.redis_client.ft(INDEX_NAME).create_index(
+                fields=fields,
+                definition=IndexDefinition(
+                    prefix=[PREFIX], index_type=IndexType.HASH)
+            )
+
+        for embedding in embeddings:
+            key = f"{PREFIX}:{str(embedding['id'])}"
+            embedding["vector"] = np.array(
+                embedding["vector"], dtype=np.float32).tobytes()
+            self.redis_client.hset(key, mapping=embedding)
+        print(
+            f"Loaded {self.redis_client.info()['db0']['keys']} documents in Redis search index with name: {INDEX_NAME}")
+
+    def pdf_to_embeddings(self, pdf_path: str, chunk_length: int = 1000):
+        # Read data from pdf file and split it into chunks
+        reader = PdfReader(pdf_path)
+        chunks = []
+        for page in reader.pages:
+            text_page = page.extract_text()
+            chunks.extend([text_page[i:i+chunk_length].replace('\n', '')
+                          for i in range(0, len(text_page), chunk_length)])
+
+        # Create embeddings
+        response = openai.Embedding.create(
+            model='text-embedding-ada-002', input=chunks)
+        return [{'id': value['index'], 'vector':value['embedding'], 'text':chunks[value['index']]} for value in response['data']]
+
+    def search_redis(self,
+                     user_query: str,
+                     index_name: str = "embeddings-index",
+                     vector_field: str = "vector",
+                     return_fields: list = ["text", "vector_score"],
+                     hybrid_fields="*",
+                     k: int = 5,
+                     print_results: bool = False,
+                     ):
+        # Creates embedding vector from user query
+        embedded_query = openai.Embedding.create(input=user_query,
+                                                 model="text-embedding-ada-002",
+                                                 )["data"][0]['embedding']
+        # Prepare the Query
+        base_query = f'{hybrid_fields}=>[KNN {k} @{vector_field} $vector AS vector_score]'
+        query = (
+            Query(base_query)
+            .return_fields(*return_fields)
+            .sort_by("vector_score")
+            .paging(0, k)
+            .dialect(2)
+        )
+        params_dict = {"vector": np.array(
+            embedded_query).astype(dtype=np.float32).tobytes()}
+        # perform vector search
+        results = self.redis_client.ft(index_name).search(query, params_dict)
+        if print_results:
+            for i, doc in enumerate(results.docs):
+                score = 1 - float(doc.vector_score)
+                print(f"{i}. {doc.text} (Score: {round(score ,3) })")
+        return [doc['text'] for doc in results.docs]
\ No newline at end of file
diff --git a/scripts/question_answering_on_pdf/intentservice.py b/scripts/question_answering_on_pdf/intentservice.py
new file mode 100644
index 0000000..c0820bb
--- /dev/null
+++ b/scripts/question_answering_on_pdf/intentservice.py
@@ -0,0 +1,18 @@
+import openai
+
+class IntentService():
+     def __init__(self):
+        pass
+     
+     def get_intent(self, user_question: str):
+         # call the openai ChatCompletion endpoint
+         response = openai.ChatCompletion.create(
+         model="gpt-3.5-turbo",
+         messages=[
+               {"role": "user", "content": f'Extract the keywords from the following question: {user_question}'+
+                 'Do not answer anything else, only the keywords.'}
+            ]
+         )
+
+         # extract the response
+         return (response['choices'][0]['message']['content'])
\ No newline at end of file
diff --git a/scripts/question_answering_on_pdf/responseservice.py b/scripts/question_answering_on_pdf/responseservice.py
new file mode 100644
index 0000000..a50b14f
--- /dev/null
+++ b/scripts/question_answering_on_pdf/responseservice.py
@@ -0,0 +1,18 @@
+import openai
+
+class ResponseService():
+     def __init__(self):
+        pass
+     
+     def generate_response(self, facts, user_question):
+         # call the openai ChatCompletion endpoint
+         response = openai.ChatCompletion.create(
+         model="gpt-3.5-turbo",
+         messages=[
+               {"role": "user", "content": 'Based on the FACTS, give an answer to the QUESTION.'+ 
+                f'QUESTION: {user_question}. FACTS: {facts}'}
+            ]
+         )
+
+         # extract the response
+         return (response['choices'][0]['message']['content'])
\ No newline at end of file
diff --git a/scripts/question_answering_on_pdf/run.py b/scripts/question_answering_on_pdf/run.py
new file mode 100644
index 0000000..ee9da44
--- /dev/null
+++ b/scripts/question_answering_on_pdf/run.py
@@ -0,0 +1,32 @@
+from dotenv import load_dotenv
+load_dotenv()
+
+from scripts.question_answering_on_pdf.intentservice import IntentService
+from scripts.question_answering_on_pdf.responseservice import ResponseService
+from scripts.question_answering_on_pdf.dataservice import DataService
+
+# Example pdf
+pdf = 'data/pdf-example.pdf'
+
+data_service = DataService()
+
+# Drop all data from redis if needed
+data_service.drop_redis_data()
+
+# Load data from pdf to redis
+data = data_service.pdf_to_embeddings(pdf)
+
+data_service.load_data_to_redis(data)
+
+intent_service = IntentService()
+response_service = ResponseService()
+
+# Question 
+question = "Can you explain to me the summary of the paper?"
+# Get the intent
+intents = intent_service.get_intent(question)
+# Get the facts
+facts = data_service.search_redis(intents)
+# Get the answer
+answer = response_service.generate_response(facts, question)
+print(answer)
\ No newline at end of file

From d0cfba1a047ed5ce1b19a1b6dc6393d3d6415335 Mon Sep 17 00:00:00 2001
From: Alvaro Ortiz <ortiz.fernandez.alvaro@gmail.com>
Date: Fri, 12 Jan 2024 19:05:40 +0000
Subject: [PATCH 6/7] Add packages to use ai_teacher

---
 pyproject.toml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0a9c24f..d24ab3a 100755
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,7 +9,7 @@ license = "Propietary"
 
 keywords = ["cookiecutter"]
 
-repository = ""
+repository = "https://github.com/"
 
 readme = ["README.md", "LICENSE", "RELEASE_NOTES.md"]
 
@@ -56,6 +56,10 @@ radon = "5.1.0"
 semver = "2.13.0"
 snakeviz = "2.1.1"
 openai = ">1"
+python-docx = "0.8.11"
+python-pptx = "0.6.21"
+PyPDF2 = "1.28.6"
+fpdf = "1.7.2"
 
 [tool.poetry.group.docs.dependencies]
 furo = "^2022.12.7"

From 390bf2770c2bc44084718d42ae63dd98ebd3f438 Mon Sep 17 00:00:00 2001
From: Alvaro Ortiz <ortiz.fernandez.alvaro@gmail.com>
Date: Fri, 12 Jan 2024 19:06:11 +0000
Subject: [PATCH 7/7] AI teacher: add complete MVP

---
 ai_teacher/exam_generator/__init__.py       |  0
 ai_teacher/exam_generator/exam_creator.py   | 46 +++++++++++++++++++++
 ai_teacher/file_processor/__init__.py       |  0
 ai_teacher/file_processor/docx_processor.py | 17 ++++++++
 ai_teacher/file_processor/pdf_processor.py  | 20 +++++++++
 ai_teacher/file_processor/ppt_processor.py  | 21 ++++++++++
 ai_teacher/main.py                          | 44 ++++++++++++++++++++
 ai_teacher/pdf_creator/__init__.py          |  0
 ai_teacher/pdf_creator/pdf_exporter.py      | 21 ++++++++++
 ai_teacher/utils/__init__.py                |  0
 ai_teacher/utils/file_uploader.py           | 24 +++++++++++
 11 files changed, 193 insertions(+)
 create mode 100644 ai_teacher/exam_generator/__init__.py
 create mode 100644 ai_teacher/exam_generator/exam_creator.py
 create mode 100644 ai_teacher/file_processor/__init__.py
 create mode 100644 ai_teacher/file_processor/docx_processor.py
 create mode 100644 ai_teacher/file_processor/pdf_processor.py
 create mode 100644 ai_teacher/file_processor/ppt_processor.py
 create mode 100644 ai_teacher/main.py
 create mode 100644 ai_teacher/pdf_creator/__init__.py
 create mode 100644 ai_teacher/pdf_creator/pdf_exporter.py
 create mode 100644 ai_teacher/utils/__init__.py
 create mode 100644 ai_teacher/utils/file_uploader.py

diff --git a/ai_teacher/exam_generator/__init__.py b/ai_teacher/exam_generator/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/ai_teacher/exam_generator/exam_creator.py b/ai_teacher/exam_generator/exam_creator.py
new file mode 100644
index 0000000..d6bb3fb
--- /dev/null
+++ b/ai_teacher/exam_generator/exam_creator.py
@@ -0,0 +1,46 @@
+# -*- coding: utf-8 -*-
+import os
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from openai import OpenAI
+
+
+class ExamCreator:
+    def __init__(self):
+        self.client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
+
+    def generate_questions(self, content):
+        """Generates multiple-choice questions based on the provided content."""
+        try:
+            response = self.client.chat.completions.create(
+                model="gpt-3.5-turbo-1106",
+                response_format={"type": "json_object"},
+                messages=[
+                    {
+                        "role": "system",
+                        "content": "You are a helpful assistant designed to output a multiple choice exam as a JSON.",
+                    },
+                    {"role": "user", "content": self._create_prompt(content)},
+                ],
+            )
+            return response.choices[0].message.content
+        except Exception as e:
+            print(f"Error in generating questions: {e}")
+            return None
+
+    @staticmethod
+    def _create_prompt(content):
+        """Creates a prompt for the OpenAI API based on the content."""
+        prompt = f"Create 5 multiple-choice questions based on content between the content tags: <content>\n\n{content}\n\n</content>"
+        return prompt
+
+
+# Test the exam creator (you can remove this test in your actual application)
+if __name__ == "__main__":
+    exam_creator = ExamCreator()
+    sample_content = "Your sample text content here."
+    questions = exam_creator.generate_questions(sample_content)
+    print(questions)
diff --git a/ai_teacher/file_processor/__init__.py b/ai_teacher/file_processor/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/ai_teacher/file_processor/docx_processor.py b/ai_teacher/file_processor/docx_processor.py
new file mode 100644
index 0000000..8bafe89
--- /dev/null
+++ b/ai_teacher/file_processor/docx_processor.py
@@ -0,0 +1,17 @@
+# -*- coding: utf-8 -*-
+import docx
+
+
+class DOCXProcessor:
+    @staticmethod
+    def process_docx(file_path):
+        """Processes the Word file and extracts its content."""
+        doc = docx.Document(file_path)
+        content = [paragraph.text for paragraph in doc.paragraphs]
+        return " ".join(content)
+
+
+# Test the processor (you can remove this test in your actual application)
+if __name__ == "__main__":
+    docx_content = DOCXProcessor.process_docx("path_to_your_docx.docx")
+    print(docx_content)
diff --git a/ai_teacher/file_processor/pdf_processor.py b/ai_teacher/file_processor/pdf_processor.py
new file mode 100644
index 0000000..8c738d6
--- /dev/null
+++ b/ai_teacher/file_processor/pdf_processor.py
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+import PyPDF2
+
+
+class PDFProcessor:
+    @staticmethod
+    def process_pdf(file_path):
+        """Processes the PDF file and extracts its content."""
+        with open(file_path, "rb") as file:
+            pdf_reader = PyPDF2.PdfFileReader(file)
+            content = []
+            for page in range(pdf_reader.numPages):
+                content.append(pdf_reader.getPage(page).extractText())
+            return " ".join(content)
+
+
+# Test the processor (you can remove this test in your actual application)
+if __name__ == "__main__":
+    pdf_content = PDFProcessor.process_pdf("path_to_your_pdf.pdf")
+    print(pdf_content)
diff --git a/ai_teacher/file_processor/ppt_processor.py b/ai_teacher/file_processor/ppt_processor.py
new file mode 100644
index 0000000..92ff35c
--- /dev/null
+++ b/ai_teacher/file_processor/ppt_processor.py
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+from pptx import Presentation
+
+
+class PPTProcessor:
+    @staticmethod
+    def process_ppt(file_path):
+        """Processes the PowerPoint file and extracts text from each slide."""
+        prs = Presentation(file_path)
+        content = []
+        for slide in prs.slides:
+            for shape in slide.shapes:
+                if hasattr(shape, "text"):
+                    content.append(shape.text)
+        return " ".join(content)
+
+
+# Test the processor (you can remove this test in your actual application)
+if __name__ == "__main__":
+    ppt_content = PPTProcessor.process_ppt("path_to_your_ppt.pptx")
+    print(ppt_content)
diff --git a/ai_teacher/main.py b/ai_teacher/main.py
new file mode 100644
index 0000000..b7758e4
--- /dev/null
+++ b/ai_teacher/main.py
@@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+from exam_generator.exam_creator import ExamCreator
+from file_processor.docx_processor import DOCXProcessor
+from file_processor.pdf_processor import PDFProcessor
+from file_processor.ppt_processor import PPTProcessor
+from pdf_creator.pdf_exporter import PDFExporter
+from utils.file_uploader import FileUploader
+
+
+def process_file(file_path):
+    """Determines the file type and processes it using the appropriate processor."""
+    if file_path.endswith(".pdf"):
+        return PDFProcessor.process_pdf(file_path)
+    elif file_path.endswith(".pptx"):
+        return PPTProcessor.process_ppt(file_path)
+    elif file_path.endswith(".docx"):
+        return DOCXProcessor.process_docx(file_path)
+    else:
+        print("Unsupported file format.")
+        return None
+
+
+def main():
+    print("Welcome to the Exam Generator!")
+    file_path = FileUploader.upload_file()
+    content = process_file(file_path)
+
+    if content:
+        print("File processed successfully.")
+        exam_creator = ExamCreator()
+        exam_questions = exam_creator.generate_questions(content)
+        if exam_questions:
+            print("Exam generated successfully.")
+            pdf_exporter = PDFExporter()
+            pdf_exporter.create_pdf(exam_questions, "generated_exam.pdf")
+            print("Exam exported as PDF successfully.")
+        else:
+            print("Failed to generate exam.")
+    else:
+        print("Failed to process the file.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/ai_teacher/pdf_creator/__init__.py b/ai_teacher/pdf_creator/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/ai_teacher/pdf_creator/pdf_exporter.py b/ai_teacher/pdf_creator/pdf_exporter.py
new file mode 100644
index 0000000..c5bf6ca
--- /dev/null
+++ b/ai_teacher/pdf_creator/pdf_exporter.py
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+from fpdf import FPDF
+
+
+class PDFExporter:
+    def __init__(self):
+        self.pdf = FPDF()
+
+    def create_pdf(self, content, file_name="exam.pdf"):
+        """Creates a PDF file from the provided content."""
+        self.pdf.add_page()
+        self.pdf.set_font("Arial", size=12)
+        self.pdf.multi_cell(0, 10, content)
+        self.pdf.output(file_name)
+
+
+# Test the PDF exporter (you can remove this test in your actual application)
+if __name__ == "__main__":
+    exporter = PDFExporter()
+    sample_content = "Your sample exam content here."
+    exporter.create_pdf(sample_content, "sample_exam.pdf")
diff --git a/ai_teacher/utils/__init__.py b/ai_teacher/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/ai_teacher/utils/file_uploader.py b/ai_teacher/utils/file_uploader.py
new file mode 100644
index 0000000..26d5487
--- /dev/null
+++ b/ai_teacher/utils/file_uploader.py
@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+import os
+
+
+class FileUploader:
+    @staticmethod
+    def upload_file():
+        """Prompts the user to enter the file path and checks if the file exists.
+
+        Returns the path if the file exists, otherwise prompts again.
+        """
+        while True:
+            file_path = input("Enter the path of your file: ")
+            if os.path.isfile(file_path):
+                return file_path
+            else:
+                print("File not found. Please try again.")
+
+
+# Test the uploader (you can remove this test in your actual application)
+if __name__ == "__main__":
+    uploader = FileUploader()
+    file_path = uploader.upload_file()
+    print(f"File {file_path} uploaded successfully.")