From d80c4470853e710f5b076a3b3f83ac8d2b293ed6 Mon Sep 17 00:00:00 2001
From: Tim Ruscica <35348871+techwithtim@users.noreply.github.com>
Date: Thu, 17 Oct 2024 17:31:56 +0400
Subject: [PATCH 1/9] Updated README via script

---
 README.md | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 5a4fcaf..90b4266 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,14 @@
-# PythonAgentAI
\ No newline at end of file
+# PythonAgentAI
+
+# 💻 Launch Your Software Development Career Today!  
+
+🎓 **No degree? No problem!** My program equips you with everything you need to break into tech and land an entry-level software development role.  
+
+🚀 **Why Join?**  
+- 💼 **$70k+ starting salary potential**  
+- 🕐 **Self-paced:** Complete on your own time  
+- 🤑 **Affordable:** Low risk compared to expensive bootcamps or degrees
+- 🎯 **45,000+ job openings** in the market  
+
+👉 **[Start your journey today!](https://techwithtim.net/dev)**  
+No experience needed—just your determination. Future-proof your career and unlock six-figure potential like many of our students have!  

From 55f7321bc19d7e4e25685a4b10c33a77b94ab3b2 Mon Sep 17 00:00:00 2001
From: mahmoud <mahmoud.bidry11@gmail.com>
Date: Sun, 2 Mar 2025 23:02:22 +0000
Subject: [PATCH 2/9] Enhance the agent by upgrading LlamaIndex to support all
 OpenAI models, including GPT-4o mini

---
 main.py          | 16 +++++++-------
 note_engine.py   |  2 +-
 pdf.py           |  4 ++--
 prompts.py       |  2 +-
 requirements.txt | 55 ++++--------------------------------------------
 5 files changed, 16 insertions(+), 63 deletions(-)

diff --git a/main.py b/main.py
index 72b53f0..4a274aa 100644
--- a/main.py
+++ b/main.py
@@ -1,12 +1,13 @@
 from dotenv import load_dotenv
 import os
 import pandas as pd
-from llama_index.query_engine import PandasQueryEngine
+from llama_index.experimental.query_engine import PandasQueryEngine
 from prompts import new_prompt, instruction_str, context
 from note_engine import note_engine
-from llama_index.tools import QueryEngineTool, ToolMetadata
-from llama_index.agent import ReActAgent
-from llama_index.llms import OpenAI
+from llama_index.core.tools import QueryEngineTool, ToolMetadata
+from llama_index.core.agent import ReActAgent
+from llama_index.llms.openai import OpenAI
+
 from pdf import canada_engine
 
 load_dotenv()
@@ -14,9 +15,7 @@
 population_path = os.path.join("data", "population.csv")
 population_df = pd.read_csv(population_path)
 
-population_query_engine = PandasQueryEngine(
-    df=population_df, verbose=True, instruction_str=instruction_str
-)
+population_query_engine = PandasQueryEngine(df=population_df, verbose=True, instruction_str=instruction_str)
 population_query_engine.update_prompts({"pandas_prompt": new_prompt})
 
 tools = [
@@ -37,7 +36,8 @@
     ),
 ]
 
-llm = OpenAI(model="gpt-3.5-turbo-0613")
+# llm = OpenAI(model="gpt-3.5-turbo")
+llm = OpenAI(model="gpt-4o-mini-2024-07-18")
 agent = ReActAgent.from_tools(tools, llm=llm, verbose=True, context=context)
 
 while (prompt := input("Enter a prompt (q to quit): ")) != "q":
diff --git a/note_engine.py b/note_engine.py
index 577cc2f..6f925fa 100644
--- a/note_engine.py
+++ b/note_engine.py
@@ -1,4 +1,4 @@
-from llama_index.tools import FunctionTool
+from llama_index.core.tools import FunctionTool
 import os
 
 note_file = os.path.join("data", "notes.txt")
diff --git a/pdf.py b/pdf.py
index 1540103..43ca26f 100644
--- a/pdf.py
+++ b/pdf.py
@@ -1,6 +1,6 @@
 import os
-from llama_index import StorageContext, VectorStoreIndex, load_index_from_storage
-from llama_index.readers import PDFReader
+from llama_index.core import StorageContext, VectorStoreIndex, load_index_from_storage
+from llama_index.readers.file import PDFReader
 
 
 def get_index(data, index_name):
diff --git a/prompts.py b/prompts.py
index 1533419..69391a9 100644
--- a/prompts.py
+++ b/prompts.py
@@ -1,4 +1,4 @@
-from llama_index import PromptTemplate
+from llama_index.core import PromptTemplate
 
 
 instruction_str = """\
diff --git a/requirements.txt b/requirements.txt
index a6f1ecc..1f41a7b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,52 +1,5 @@
-aiohttp==3.9.3
-aiosignal==1.3.1
-annotated-types==0.6.0
-anyio==4.2.0
-attrs==23.2.0
-certifi==2024.2.2
-charset-normalizer==3.3.2
-click==8.1.7
-colorama==0.4.6
-dataclasses-json==0.6.4
-Deprecated==1.2.14
-dirtyjson==1.0.8
-distro==1.9.0
-frozenlist==1.4.1
-fsspec==2024.2.0
-greenlet==3.0.3
-h11==0.14.0
-httpcore==1.0.2
-httpx==0.26.0
-idna==3.6
-joblib==1.3.2
-llama-index==0.9.48
-marshmallow==3.20.2
-multidict==6.0.5
-mypy-extensions==1.0.0
-nest-asyncio==1.6.0
-networkx==3.2.1
-nltk==3.8.1
-numpy==1.26.4
-openai==1.12.0
-packaging==23.2
-pandas==2.2.0
-pydantic==2.6.1
-pydantic_core==2.16.2
-pypdf==4.0.1
-python-dateutil==2.8.2
+llama-index==0.12.22
+llama-index-experimental==0.5.4
+pypdf==5.3.1
 python-dotenv==1.0.1
-pytz==2024.1
-regex==2023.12.25
-requests==2.31.0
-six==1.16.0
-sniffio==1.3.0
-SQLAlchemy==2.0.27
-tenacity==8.2.3
-tiktoken==0.6.0
-tqdm==4.66.2
-typing-inspect==0.9.0
-typing_extensions==4.9.0
-tzdata==2024.1
-urllib3==2.2.0
-wrapt==1.16.0
-yarl==1.9.4
+pandas==2.2.3
\ No newline at end of file

From 1a9c8599489b59061ff1656aab95753186ee63eb Mon Sep 17 00:00:00 2001
From: Mahmoud BIDRY <mahmoud.bidry11@gmail.com>
Date: Sun, 2 Mar 2025 23:36:46 +0000
Subject: [PATCH 3/9] Update README.md

---
 README.md | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 90 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 90b4266..0db59b2 100644
--- a/README.md
+++ b/README.md
@@ -1,14 +1,95 @@
 # PythonAgentAI
 
-# 💻 Launch Your Software Development Career Today!  
+PythonAgentAI is a project designed to leverage advanced OpenAI models, including `gpt-4o-mini` with a 128k context window, to perform various AI-driven tasks. This project utilizes the `llama-index` library (version 0.12.22) and its experimental extensions to enhance functionality.
 
-🎓 **No degree? No problem!** My program equips you with everything you need to break into tech and land an entry-level software development role.  
+## Features
 
-🚀 **Why Join?**  
-- 💼 **$70k+ starting salary potential**  
-- 🕐 **Self-paced:** Complete on your own time  
-- 🤑 **Affordable:** Low risk compared to expensive bootcamps or degrees
-- 🎯 **45,000+ job openings** in the market  
+- Integration with OpenAI's `gpt-4o-mini` model, supporting a 128k context window.
+- Modular code structure with components like `main.py`, `pdf.py`, `prompts.py`, and `note_engine.py`.
+- Utilization of `llama-index` and `llama-index-experimental` libraries for advanced indexing and querying.
 
-👉 **[Start your journey today!](https://techwithtim.net/dev)**  
-No experience needed—just your determination. Future-proof your career and unlock six-figure potential like many of our students have!  
+## Installation
+
+### Prerequisites
+
+- Python 3.11 installed on your system. You can download it from the [official Python website](https://www.python.org/downloads/).
+
+## Configuration
+
+1. **Create the `.env` File**
+
+   The project requires a `.env` file for storing environment variables, including the OpenAI API key. You can create it by running:
+
+   ```bash
+   cp .env.example .env
+   ```
+
+2. **Set Your OpenAI API Key**
+
+   Open the `.env` file in a text editor and configure your OpenAI API key:
+
+   ```
+   OPENAI_API_KEY=your_openai_api_key_here
+   ```
+
+   Replace `your_openai_api_key_here` with your actual API key from OpenAI.
+
+### Setup Instructions
+
+1. **Clone the Repository**
+
+   Open your terminal and run:
+
+   ```bash
+   git clone https://github.com/your-username/PythonAgentAI.git
+   cd PythonAgentAI
+   ```
+
+2. **Create a Virtual Environment**
+
+   It's recommended to use a virtual environment to manage dependencies. To create one using Python's `venv` module:
+
+   ```bash
+   python3.11 -m venv env
+   ```
+
+3. **Activate the Virtual Environment**
+
+   - On **Unix or macOS**:
+
+     ```bash
+     source env/bin/activate
+     ```
+
+   - On **Windows**:
+
+     ```bash
+     .\env\Scripts\activate
+     ```
+
+4. **Install Dependencies**
+
+   With the virtual environment activated, install the required packages:
+
+   ```bash
+   pip install -r requirements.txt
+   ```
+
+   This will install the following essential packages:
+
+   - `llama-index==0.12.22`
+   - `llama-index-experimental==0.5.4`
+   - `pypdf==5.3.1`
+   - `python-dotenv==1.0.1`
+   - `pandas==2.2.3`
+
+## Usage
+
+After setting up the environment and installing the dependencies, you can run the main script:
+
+```bash
+python main.py
+```
+
+
+Ensure that you have configured any necessary environment variables or settings required by the script. Refer to the `prompts.py` and `note_engine.py` files for customizable parameters and functionalities.

From 7c09d75239b0aa536a36bb7bfe7042ebddac7da5 Mon Sep 17 00:00:00 2001
From: Scraper0024 <gaobaohulu@outlook.com>
Date: Mon, 21 Apr 2025 14:39:09 +0800
Subject: [PATCH 4/9] Update main.py

---
 main.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/main.py b/main.py
index 4a274aa..7a71fdf 100644
--- a/main.py
+++ b/main.py
@@ -2,13 +2,14 @@
 import os
 import pandas as pd
 from llama_index.experimental.query_engine import PandasQueryEngine
-from prompts import new_prompt, instruction_str, context
+from prompts import new_prompt, instruction_str, coffee_context, context
 from note_engine import note_engine
 from llama_index.core.tools import QueryEngineTool, ToolMetadata
 from llama_index.core.agent import ReActAgent
 from llama_index.llms.openai import OpenAI
 
 from pdf import canada_engine
+from coffee_scraper import coffee_scraper
 
 load_dotenv()
 
@@ -18,6 +19,9 @@
 population_query_engine = PandasQueryEngine(df=population_df, verbose=True, instruction_str=instruction_str)
 population_query_engine.update_prompts({"pandas_prompt": new_prompt})
 
+coffee_scraper.scrape_nearby_coffee()
+coffee_query_engine = coffee_scraper.get_query_engine()
+
 tools = [
     note_engine,
     QueryEngineTool(
@@ -34,11 +38,21 @@
             description="this gives detailed information about canada the country",
         ),
     ),
+    QueryEngineTool(
+        query_engine=coffee_query_engine,
+        metadata=ToolMetadata(
+            name="ottawa_coffee",
+            description=(
+                "Coffee shops near National Gallery of Canada. "
+                "Includes price levels (1-3), ratings (1-5) and distances."
+            )
+        )
+    )
 ]
 
 # llm = OpenAI(model="gpt-3.5-turbo")
 llm = OpenAI(model="gpt-4o-mini-2024-07-18")
-agent = ReActAgent.from_tools(tools, llm=llm, verbose=True, context=context)
+agent = ReActAgent.from_tools(tools, llm=llm, verbose=True, context=context + "\n" + coffee_context)
 
 while (prompt := input("Enter a prompt (q to quit): ")) != "q":
     result = agent.query(prompt)

From 6baf69b502e74ab375b2581ffbc3fe1b046ccf7d Mon Sep 17 00:00:00 2001
From: Scraper0024 <gaobaohulu@outlook.com>
Date: Mon, 21 Apr 2025 14:44:35 +0800
Subject: [PATCH 5/9] Update prompts.py

---
 prompts.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/prompts.py b/prompts.py
index 69391a9..2717219 100644
--- a/prompts.py
+++ b/prompts.py
@@ -24,3 +24,20 @@
 
 context = """Purpose: The primary role of this agent is to assist users by providing accurate 
             information about world population statistics and details about a country. """
+coffee_context = """
+    ## Coffee Shop Data Usage Guide
+    - Data Range: Surrounding the National Gallery of Canada, within a 2 km radius.
+    - Field Descriptions：
+      - price_level: 
+        - $: Budget-friendly options, typically low-cost.
+        - $$: Mid-range pricing.
+        - $1-10: Price range between $1 and $10, for more specific pricing.
+        - $20-30: Price range between $20 and $30, for higher-end options.
+        - [Variable range like $15-20]: Represents a price range with variable amounts, indicating a specific price range.
+      - rating: Google rating 
+      - distance_km: Distance from the gallery in kilometers.
+    - Example Queries:：
+      - 'List coffee shops with rating > 4 within 1km'
+      - 'What is the average price level of shops within 0.5km?'
+      - 'Find the closest 3 coffee shops'
+"""

From 654b95474686bda059bf94ee56b087a67c3fc8bf Mon Sep 17 00:00:00 2001
From: Scraper0024 <gaobaohulu@outlook.com>
Date: Mon, 21 Apr 2025 14:45:38 +0800
Subject: [PATCH 6/9] Update .env.example

---
 .env.example | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.env.example b/.env.example
index e570b8b..1c79fef 100644
--- a/.env.example
+++ b/.env.example
@@ -1 +1,2 @@
-OPENAI_API_KEY=
+SCRAPELESS_API_TOKEN="your_api_token_here"
+OPENAI_API_KEY="your_openai_key"

From 60b5d2fce2d0e8d998da35610fb59aeb1e2fe438 Mon Sep 17 00:00:00 2001
From: Scraper0024 <gaobaohulu@outlook.com>
Date: Mon, 21 Apr 2025 14:51:05 +0800
Subject: [PATCH 7/9] Create coffee_scraper.py

Add a test example
---
 coffee_scraper.py | 83 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)
 create mode 100644 coffee_scraper.py

diff --git a/coffee_scraper.py b/coffee_scraper.py
new file mode 100644
index 0000000..df65d46
--- /dev/null
+++ b/coffee_scraper.py
@@ -0,0 +1,83 @@
+import os
+import requests
+import pandas as pd
+from geopy.distance import geodesic
+from llama_index.experimental import PandasQueryEngine
+
+
+class CoffeeScraper:
+    def __init__(self):
+        self.api_token = os.getenv("SCRAPELESS_API_TOKEN")
+        self.gallery_coords = (45.4299, -75.6939)  # National Gallery of Canada location
+        self.df = pd.DataFrame(columns=[
+            "name", "address", "price_level",
+            "rating", "distance_km", "scraped_at"
+        ])
+
+    def _send_scrapeless_request(self, query: str):
+        host = "api.scrapeless.com"
+        url = f"https://{host}/api/v1/scraper/request"
+
+        payload = {
+            "actor": "scraper.google.maps",
+            "input": {
+                "q": query,
+                "engine": "google_maps",
+                "type": "search",
+                "ll": "@45.4299,-75.6939,14z",
+                "google_domain": "google.com",
+                "gl": "ca",
+                "hl": "en-sg",
+                "data": "",
+                "place_id": "",
+                "start": ""
+            }
+        }
+
+        response = requests.post(
+            url,
+            headers={"x-api-token": self.api_token},
+            json=payload
+        )
+
+        if response.status_code == 200:
+            res_data = response.json()
+            print(f"Scraping status: {res_data}")
+            return res_data["local_results"]
+        else:
+            raise Exception(f"Scraping failed: {response.text}")
+
+    def scrape_nearby_coffee(self):
+        try:
+            results = self._send_scrapeless_request("coffee")
+
+            for place in results:
+                shop_coords = (place["gps_coordinates"]["latitude"], place["gps_coordinates"]["longitude"])
+                print(f"shop_coords: {shop_coords}")
+                print(f"distance: {round(geodesic(self.gallery_coords, shop_coords).km, 2)}")
+                self.df = pd.concat([self.df, pd.DataFrame([{
+                    "name": place["title"],
+                    "address": place["address"],
+                    "price_level": len(place.get("price", "")),
+                    "rating": place.get("rating", None),
+                    "distance_km": round(geodesic(self.gallery_coords, shop_coords).km, 2),
+                    "scraped_at": pd.Timestamp.now()
+                }])], ignore_index=True)
+
+            print(f"Successfully scraped data for {len(results)} coffee shops.")
+        except Exception as e:
+            print(f"The scraping failed.: {str(e)}")
+
+    def get_query_engine(self):
+        return PandasQueryEngine(
+            df=self.df,
+            verbose=True,
+            instruction_str=(
+                "You are a coffee shop analyst. Use this data to answer questions "
+                "about coffee shops near National Gallery of Canada. "
+                "Key fields: name, price_level ($1-3), rating (1-5), distance_km."
+            )
+        )
+
+
+coffee_scraper = CoffeeScraper()

From ba92b7417248d32c6c922cd2a2545a9c0bb2a69c Mon Sep 17 00:00:00 2001
From: Scraper0024 <gaobaohulu@outlook.com>
Date: Mon, 21 Apr 2025 15:04:29 +0800
Subject: [PATCH 8/9] Update README.md

Complete usage steps
---
 README.md | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 0db59b2..bd47474 100644
--- a/README.md
+++ b/README.md
@@ -1,18 +1,18 @@
 # PythonAgentAI
 
-PythonAgentAI is a project designed to leverage advanced OpenAI models, including `gpt-4o-mini` with a 128k context window, to perform various AI-driven tasks. This project utilizes the `llama-index` library (version 0.12.22) and its experimental extensions to enhance functionality.
+The **PythonAgentAI** project aims to leverage advanced OpenAI models (including `gpt-4o-mini` with a 128k context window) to perform a variety of AI-powered tasks. This project integrates **Scrapeless** for Google Maps and the **llama-index** library (version 0.12.22) along with its experimental extensions to enable large language models to provide real-time responses.
 
 ## Features
 
-- Integration with OpenAI's `gpt-4o-mini` model, supporting a 128k context window.
-- Modular code structure with components like `main.py`, `pdf.py`, `prompts.py`, and `note_engine.py`.
-- Utilization of `llama-index` and `llama-index-experimental` libraries for advanced indexing and querying.
+- Integration with OpenAI’s `gpt-4o-mini` model, supporting a 128k context window.
+- Modular code structure including components like `main.py`, `pdf`.`py`, `prompts.py`, and `note_engine.py`.
+- Advanced indexing and querying via `llama-index` and `llama-index-experimental` libraries.
 
 ## Installation
 
 ### Prerequisites
 
-- Python 3.11 installed on your system. You can download it from the [official Python website](https://www.python.org/downloads/).
+- Python 3.11 must be installed on your system. You can download it from the [official Python website](https://www.python.org/downloads/).
 
 ## Configuration
 
@@ -34,6 +34,10 @@ PythonAgentAI is a project designed to leverage advanced OpenAI models, includin
 
    Replace `your_openai_api_key_here` with your actual API key from OpenAI.
 
+- **Log into [Scrapeless](https://app.scrapeless.com/passport/login?utm_source=github&utm_medium=readme&utm_campaign=twt) and obtain your API token.**
+
+![Get the Scrapeless API key](https://assets.scrapeless.com/prod/posts/naver-product/77c0cef86a29013173eb41a34f42d3f4.png)
+
 ### Setup Instructions
 
 1. **Clone the Repository**
@@ -93,3 +97,13 @@ python main.py
 
 
 Ensure that you have configured any necessary environment variables or settings required by the script. Refer to the `prompts.py` and `note_engine.py` files for customizable parameters and functionalities.
+
+2. **Input the provided prompts to receive results**. After a short wait, you’ll see output similar to the images below:
+
+- **Find the highest rated coffee shop within 0.5km**
+
+![Result of the highest rated coffee shop within 0.5km](https://assets.scrapeless.com/prod/posts/deep-serp-api-online/4ea1b12e422967bccd0db82282cb0270.png)
+ 
+- **Find the closest coffee shop to the target location**
+
+![Result of the closest coffee shop to the location](https://assets.scrapeless.com/prod/posts/deep-serp-api-online/d7e32f4d01913dbd7b76e15983ce46e2.png)

From eace7c13f344e54debfdd1ef8da9f30675455375 Mon Sep 17 00:00:00 2001
From: Scraper0024 <gaobaohulu@outlook.com>
Date: Mon, 21 Apr 2025 15:08:12 +0800
Subject: [PATCH 9/9] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index bd47474..b3c61b4 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # PythonAgentAI
 
-The **PythonAgentAI** project aims to leverage advanced OpenAI models (including `gpt-4o-mini` with a 128k context window) to perform a variety of AI-powered tasks. This project integrates **Scrapeless** for Google Maps and the **llama-index** library (version 0.12.22) along with its experimental extensions to enable large language models to provide real-time responses.
+The **PythonAgentAI** project aims to leverage advanced OpenAI models (including `gpt-4o-mini` with a 128k context window) to perform a variety of AI-powered tasks. This project integrates [**Scrapeless**](https://www.scrapeless.com/en?utm_source=github&utm_medium=readme&utm_campaign=twt) for [Google Maps](https://www.scrapeless.com/en/product/deep-serp-api?utm_source=github&utm_medium=readme&utm_campaign=twt) and the **llama-index** library (version 0.12.22) along with its experimental extensions to enable large language models to provide real-time responses.
 
 ## Features