From d80c4470853e710f5b076a3b3f83ac8d2b293ed6 Mon Sep 17 00:00:00 2001 From: Tim Ruscica <35348871+techwithtim@users.noreply.github.com> Date: Thu, 17 Oct 2024 17:31:56 +0400 Subject: [PATCH 1/9] Updated README via script --- README.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 5a4fcaf..90b4266 100644 --- a/README.md +++ b/README.md @@ -1 +1,14 @@ -# PythonAgentAI \ No newline at end of file +# PythonAgentAI + +# 💻 Launch Your Software Development Career Today! + +🎓 **No degree? No problem!** My program equips you with everything you need to break into tech and land an entry-level software development role. + +🚀 **Why Join?** +- 💼 **$70k+ starting salary potential** +- 🕐 **Self-paced:** Complete on your own time +- 🤑 **Affordable:** Low risk compared to expensive bootcamps or degrees +- 🎯 **45,000+ job openings** in the market + +👉 **[Start your journey today!](https://techwithtim.net/dev)** +No experience needed—just your determination. Future-proof your career and unlock six-figure potential like many of our students have! From 55f7321bc19d7e4e25685a4b10c33a77b94ab3b2 Mon Sep 17 00:00:00 2001 From: mahmoud Date: Sun, 2 Mar 2025 23:02:22 +0000 Subject: [PATCH 2/9] Enhance the agent by upgrading LlamaIndex to support all OpenAI models, including GPT-4o mini --- main.py | 16 +++++++------- note_engine.py | 2 +- pdf.py | 4 ++-- prompts.py | 2 +- requirements.txt | 55 ++++-------------------------------------------- 5 files changed, 16 insertions(+), 63 deletions(-) diff --git a/main.py b/main.py index 72b53f0..4a274aa 100644 --- a/main.py +++ b/main.py @@ -1,12 +1,13 @@ from dotenv import load_dotenv import os import pandas as pd -from llama_index.query_engine import PandasQueryEngine +from llama_index.experimental.query_engine import PandasQueryEngine from prompts import new_prompt, instruction_str, context from note_engine import note_engine -from llama_index.tools import QueryEngineTool, ToolMetadata -from llama_index.agent import ReActAgent -from llama_index.llms import OpenAI +from llama_index.core.tools import QueryEngineTool, ToolMetadata +from llama_index.core.agent import ReActAgent +from llama_index.llms.openai import OpenAI + from pdf import canada_engine load_dotenv() @@ -14,9 +15,7 @@ population_path = os.path.join("data", "population.csv") population_df = pd.read_csv(population_path) -population_query_engine = PandasQueryEngine( - df=population_df, verbose=True, instruction_str=instruction_str -) +population_query_engine = PandasQueryEngine(df=population_df, verbose=True, instruction_str=instruction_str) population_query_engine.update_prompts({"pandas_prompt": new_prompt}) tools = [ @@ -37,7 +36,8 @@ ), ] -llm = OpenAI(model="gpt-3.5-turbo-0613") +# llm = OpenAI(model="gpt-3.5-turbo") +llm = OpenAI(model="gpt-4o-mini-2024-07-18") agent = ReActAgent.from_tools(tools, llm=llm, verbose=True, context=context) while (prompt := input("Enter a prompt (q to quit): ")) != "q": diff --git a/note_engine.py b/note_engine.py index 577cc2f..6f925fa 100644 --- a/note_engine.py +++ b/note_engine.py @@ -1,4 +1,4 @@ -from llama_index.tools import FunctionTool +from llama_index.core.tools import FunctionTool import os note_file = os.path.join("data", "notes.txt") diff --git a/pdf.py b/pdf.py index 1540103..43ca26f 100644 --- a/pdf.py +++ b/pdf.py @@ -1,6 +1,6 @@ import os -from llama_index import StorageContext, VectorStoreIndex, load_index_from_storage -from llama_index.readers import PDFReader +from llama_index.core import StorageContext, VectorStoreIndex, load_index_from_storage +from llama_index.readers.file import PDFReader def get_index(data, index_name): diff --git a/prompts.py b/prompts.py index 1533419..69391a9 100644 --- a/prompts.py +++ b/prompts.py @@ -1,4 +1,4 @@ -from llama_index import PromptTemplate +from llama_index.core import PromptTemplate instruction_str = """\ diff --git a/requirements.txt b/requirements.txt index a6f1ecc..1f41a7b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,52 +1,5 @@ -aiohttp==3.9.3 -aiosignal==1.3.1 -annotated-types==0.6.0 -anyio==4.2.0 -attrs==23.2.0 -certifi==2024.2.2 -charset-normalizer==3.3.2 -click==8.1.7 -colorama==0.4.6 -dataclasses-json==0.6.4 -Deprecated==1.2.14 -dirtyjson==1.0.8 -distro==1.9.0 -frozenlist==1.4.1 -fsspec==2024.2.0 -greenlet==3.0.3 -h11==0.14.0 -httpcore==1.0.2 -httpx==0.26.0 -idna==3.6 -joblib==1.3.2 -llama-index==0.9.48 -marshmallow==3.20.2 -multidict==6.0.5 -mypy-extensions==1.0.0 -nest-asyncio==1.6.0 -networkx==3.2.1 -nltk==3.8.1 -numpy==1.26.4 -openai==1.12.0 -packaging==23.2 -pandas==2.2.0 -pydantic==2.6.1 -pydantic_core==2.16.2 -pypdf==4.0.1 -python-dateutil==2.8.2 +llama-index==0.12.22 +llama-index-experimental==0.5.4 +pypdf==5.3.1 python-dotenv==1.0.1 -pytz==2024.1 -regex==2023.12.25 -requests==2.31.0 -six==1.16.0 -sniffio==1.3.0 -SQLAlchemy==2.0.27 -tenacity==8.2.3 -tiktoken==0.6.0 -tqdm==4.66.2 -typing-inspect==0.9.0 -typing_extensions==4.9.0 -tzdata==2024.1 -urllib3==2.2.0 -wrapt==1.16.0 -yarl==1.9.4 +pandas==2.2.3 \ No newline at end of file From 1a9c8599489b59061ff1656aab95753186ee63eb Mon Sep 17 00:00:00 2001 From: Mahmoud BIDRY Date: Sun, 2 Mar 2025 23:36:46 +0000 Subject: [PATCH 3/9] Update README.md --- README.md | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 90 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 90b4266..0db59b2 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,95 @@ # PythonAgentAI -# 💻 Launch Your Software Development Career Today! +PythonAgentAI is a project designed to leverage advanced OpenAI models, including `gpt-4o-mini` with a 128k context window, to perform various AI-driven tasks. This project utilizes the `llama-index` library (version 0.12.22) and its experimental extensions to enhance functionality. -🎓 **No degree? No problem!** My program equips you with everything you need to break into tech and land an entry-level software development role. +## Features -🚀 **Why Join?** -- 💼 **$70k+ starting salary potential** -- 🕐 **Self-paced:** Complete on your own time -- 🤑 **Affordable:** Low risk compared to expensive bootcamps or degrees -- 🎯 **45,000+ job openings** in the market +- Integration with OpenAI's `gpt-4o-mini` model, supporting a 128k context window. +- Modular code structure with components like `main.py`, `pdf.py`, `prompts.py`, and `note_engine.py`. +- Utilization of `llama-index` and `llama-index-experimental` libraries for advanced indexing and querying. -👉 **[Start your journey today!](https://techwithtim.net/dev)** -No experience needed—just your determination. Future-proof your career and unlock six-figure potential like many of our students have! +## Installation + +### Prerequisites + +- Python 3.11 installed on your system. You can download it from the [official Python website](https://www.python.org/downloads/). + +## Configuration + +1. **Create the `.env` File** + + The project requires a `.env` file for storing environment variables, including the OpenAI API key. You can create it by running: + + ```bash + cp .env.example .env + ``` + +2. **Set Your OpenAI API Key** + + Open the `.env` file in a text editor and configure your OpenAI API key: + + ``` + OPENAI_API_KEY=your_openai_api_key_here + ``` + + Replace `your_openai_api_key_here` with your actual API key from OpenAI. + +### Setup Instructions + +1. **Clone the Repository** + + Open your terminal and run: + + ```bash + git clone https://github.com/your-username/PythonAgentAI.git + cd PythonAgentAI + ``` + +2. **Create a Virtual Environment** + + It's recommended to use a virtual environment to manage dependencies. To create one using Python's `venv` module: + + ```bash + python3.11 -m venv env + ``` + +3. **Activate the Virtual Environment** + + - On **Unix or macOS**: + + ```bash + source env/bin/activate + ``` + + - On **Windows**: + + ```bash + .\env\Scripts\activate + ``` + +4. **Install Dependencies** + + With the virtual environment activated, install the required packages: + + ```bash + pip install -r requirements.txt + ``` + + This will install the following essential packages: + + - `llama-index==0.12.22` + - `llama-index-experimental==0.5.4` + - `pypdf==5.3.1` + - `python-dotenv==1.0.1` + - `pandas==2.2.3` + +## Usage + +After setting up the environment and installing the dependencies, you can run the main script: + +```bash +python main.py +``` + + +Ensure that you have configured any necessary environment variables or settings required by the script. Refer to the `prompts.py` and `note_engine.py` files for customizable parameters and functionalities. From 7c09d75239b0aa536a36bb7bfe7042ebddac7da5 Mon Sep 17 00:00:00 2001 From: Scraper0024 Date: Mon, 21 Apr 2025 14:39:09 +0800 Subject: [PATCH 4/9] Update main.py --- main.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index 4a274aa..7a71fdf 100644 --- a/main.py +++ b/main.py @@ -2,13 +2,14 @@ import os import pandas as pd from llama_index.experimental.query_engine import PandasQueryEngine -from prompts import new_prompt, instruction_str, context +from prompts import new_prompt, instruction_str, coffee_context, context from note_engine import note_engine from llama_index.core.tools import QueryEngineTool, ToolMetadata from llama_index.core.agent import ReActAgent from llama_index.llms.openai import OpenAI from pdf import canada_engine +from coffee_scraper import coffee_scraper load_dotenv() @@ -18,6 +19,9 @@ population_query_engine = PandasQueryEngine(df=population_df, verbose=True, instruction_str=instruction_str) population_query_engine.update_prompts({"pandas_prompt": new_prompt}) +coffee_scraper.scrape_nearby_coffee() +coffee_query_engine = coffee_scraper.get_query_engine() + tools = [ note_engine, QueryEngineTool( @@ -34,11 +38,21 @@ description="this gives detailed information about canada the country", ), ), + QueryEngineTool( + query_engine=coffee_query_engine, + metadata=ToolMetadata( + name="ottawa_coffee", + description=( + "Coffee shops near National Gallery of Canada. " + "Includes price levels (1-3), ratings (1-5) and distances." + ) + ) + ) ] # llm = OpenAI(model="gpt-3.5-turbo") llm = OpenAI(model="gpt-4o-mini-2024-07-18") -agent = ReActAgent.from_tools(tools, llm=llm, verbose=True, context=context) +agent = ReActAgent.from_tools(tools, llm=llm, verbose=True, context=context + "\n" + coffee_context) while (prompt := input("Enter a prompt (q to quit): ")) != "q": result = agent.query(prompt) From 6baf69b502e74ab375b2581ffbc3fe1b046ccf7d Mon Sep 17 00:00:00 2001 From: Scraper0024 Date: Mon, 21 Apr 2025 14:44:35 +0800 Subject: [PATCH 5/9] Update prompts.py --- prompts.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/prompts.py b/prompts.py index 69391a9..2717219 100644 --- a/prompts.py +++ b/prompts.py @@ -24,3 +24,20 @@ context = """Purpose: The primary role of this agent is to assist users by providing accurate information about world population statistics and details about a country. """ +coffee_context = """ + ## Coffee Shop Data Usage Guide + - Data Range: Surrounding the National Gallery of Canada, within a 2 km radius. + - Field Descriptions: + - price_level: + - $: Budget-friendly options, typically low-cost. + - $$: Mid-range pricing. + - $1-10: Price range between $1 and $10, for more specific pricing. + - $20-30: Price range between $20 and $30, for higher-end options. + - [Variable range like $15-20]: Represents a price range with variable amounts, indicating a specific price range. + - rating: Google rating + - distance_km: Distance from the gallery in kilometers. + - Example Queries:: + - 'List coffee shops with rating > 4 within 1km' + - 'What is the average price level of shops within 0.5km?' + - 'Find the closest 3 coffee shops' +""" From 654b95474686bda059bf94ee56b087a67c3fc8bf Mon Sep 17 00:00:00 2001 From: Scraper0024 Date: Mon, 21 Apr 2025 14:45:38 +0800 Subject: [PATCH 6/9] Update .env.example --- .env.example | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.env.example b/.env.example index e570b8b..1c79fef 100644 --- a/.env.example +++ b/.env.example @@ -1 +1,2 @@ -OPENAI_API_KEY= +SCRAPELESS_API_TOKEN="your_api_token_here" +OPENAI_API_KEY="your_openai_key" From 60b5d2fce2d0e8d998da35610fb59aeb1e2fe438 Mon Sep 17 00:00:00 2001 From: Scraper0024 Date: Mon, 21 Apr 2025 14:51:05 +0800 Subject: [PATCH 7/9] Create coffee_scraper.py Add a test example --- coffee_scraper.py | 83 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 coffee_scraper.py diff --git a/coffee_scraper.py b/coffee_scraper.py new file mode 100644 index 0000000..df65d46 --- /dev/null +++ b/coffee_scraper.py @@ -0,0 +1,83 @@ +import os +import requests +import pandas as pd +from geopy.distance import geodesic +from llama_index.experimental import PandasQueryEngine + + +class CoffeeScraper: + def __init__(self): + self.api_token = os.getenv("SCRAPELESS_API_TOKEN") + self.gallery_coords = (45.4299, -75.6939) # National Gallery of Canada location + self.df = pd.DataFrame(columns=[ + "name", "address", "price_level", + "rating", "distance_km", "scraped_at" + ]) + + def _send_scrapeless_request(self, query: str): + host = "api.scrapeless.com" + url = f"https://{host}/api/v1/scraper/request" + + payload = { + "actor": "scraper.google.maps", + "input": { + "q": query, + "engine": "google_maps", + "type": "search", + "ll": "@45.4299,-75.6939,14z", + "google_domain": "google.com", + "gl": "ca", + "hl": "en-sg", + "data": "", + "place_id": "", + "start": "" + } + } + + response = requests.post( + url, + headers={"x-api-token": self.api_token}, + json=payload + ) + + if response.status_code == 200: + res_data = response.json() + print(f"Scraping status: {res_data}") + return res_data["local_results"] + else: + raise Exception(f"Scraping failed: {response.text}") + + def scrape_nearby_coffee(self): + try: + results = self._send_scrapeless_request("coffee") + + for place in results: + shop_coords = (place["gps_coordinates"]["latitude"], place["gps_coordinates"]["longitude"]) + print(f"shop_coords: {shop_coords}") + print(f"distance: {round(geodesic(self.gallery_coords, shop_coords).km, 2)}") + self.df = pd.concat([self.df, pd.DataFrame([{ + "name": place["title"], + "address": place["address"], + "price_level": len(place.get("price", "")), + "rating": place.get("rating", None), + "distance_km": round(geodesic(self.gallery_coords, shop_coords).km, 2), + "scraped_at": pd.Timestamp.now() + }])], ignore_index=True) + + print(f"Successfully scraped data for {len(results)} coffee shops.") + except Exception as e: + print(f"The scraping failed.: {str(e)}") + + def get_query_engine(self): + return PandasQueryEngine( + df=self.df, + verbose=True, + instruction_str=( + "You are a coffee shop analyst. Use this data to answer questions " + "about coffee shops near National Gallery of Canada. " + "Key fields: name, price_level ($1-3), rating (1-5), distance_km." + ) + ) + + +coffee_scraper = CoffeeScraper() From ba92b7417248d32c6c922cd2a2545a9c0bb2a69c Mon Sep 17 00:00:00 2001 From: Scraper0024 Date: Mon, 21 Apr 2025 15:04:29 +0800 Subject: [PATCH 8/9] Update README.md Complete usage steps --- README.md | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 0db59b2..bd47474 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,18 @@ # PythonAgentAI -PythonAgentAI is a project designed to leverage advanced OpenAI models, including `gpt-4o-mini` with a 128k context window, to perform various AI-driven tasks. This project utilizes the `llama-index` library (version 0.12.22) and its experimental extensions to enhance functionality. +The **PythonAgentAI** project aims to leverage advanced OpenAI models (including `gpt-4o-mini` with a 128k context window) to perform a variety of AI-powered tasks. This project integrates **Scrapeless** for Google Maps and the **llama-index** library (version 0.12.22) along with its experimental extensions to enable large language models to provide real-time responses. ## Features -- Integration with OpenAI's `gpt-4o-mini` model, supporting a 128k context window. -- Modular code structure with components like `main.py`, `pdf.py`, `prompts.py`, and `note_engine.py`. -- Utilization of `llama-index` and `llama-index-experimental` libraries for advanced indexing and querying. +- Integration with OpenAI’s `gpt-4o-mini` model, supporting a 128k context window. +- Modular code structure including components like `main.py`, `pdf`.`py`, `prompts.py`, and `note_engine.py`. +- Advanced indexing and querying via `llama-index` and `llama-index-experimental` libraries. ## Installation ### Prerequisites -- Python 3.11 installed on your system. You can download it from the [official Python website](https://www.python.org/downloads/). +- Python 3.11 must be installed on your system. You can download it from the [official Python website](https://www.python.org/downloads/). ## Configuration @@ -34,6 +34,10 @@ PythonAgentAI is a project designed to leverage advanced OpenAI models, includin Replace `your_openai_api_key_here` with your actual API key from OpenAI. +- **Log into [Scrapeless](https://app.scrapeless.com/passport/login?utm_source=github&utm_medium=readme&utm_campaign=twt) and obtain your API token.** + +![Get the Scrapeless API key](https://assets.scrapeless.com/prod/posts/naver-product/77c0cef86a29013173eb41a34f42d3f4.png) + ### Setup Instructions 1. **Clone the Repository** @@ -93,3 +97,13 @@ python main.py Ensure that you have configured any necessary environment variables or settings required by the script. Refer to the `prompts.py` and `note_engine.py` files for customizable parameters and functionalities. + +2. **Input the provided prompts to receive results**. After a short wait, you’ll see output similar to the images below: + +- **Find the highest rated coffee shop within 0.5km** + +![Result of the highest rated coffee shop within 0.5km](https://assets.scrapeless.com/prod/posts/deep-serp-api-online/4ea1b12e422967bccd0db82282cb0270.png) + +- **Find the closest coffee shop to the target location** + +![Result of the closest coffee shop to the location](https://assets.scrapeless.com/prod/posts/deep-serp-api-online/d7e32f4d01913dbd7b76e15983ce46e2.png) From eace7c13f344e54debfdd1ef8da9f30675455375 Mon Sep 17 00:00:00 2001 From: Scraper0024 Date: Mon, 21 Apr 2025 15:08:12 +0800 Subject: [PATCH 9/9] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bd47474..b3c61b4 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # PythonAgentAI -The **PythonAgentAI** project aims to leverage advanced OpenAI models (including `gpt-4o-mini` with a 128k context window) to perform a variety of AI-powered tasks. This project integrates **Scrapeless** for Google Maps and the **llama-index** library (version 0.12.22) along with its experimental extensions to enable large language models to provide real-time responses. +The **PythonAgentAI** project aims to leverage advanced OpenAI models (including `gpt-4o-mini` with a 128k context window) to perform a variety of AI-powered tasks. This project integrates [**Scrapeless**](https://www.scrapeless.com/en?utm_source=github&utm_medium=readme&utm_campaign=twt) for [Google Maps](https://www.scrapeless.com/en/product/deep-serp-api?utm_source=github&utm_medium=readme&utm_campaign=twt) and the **llama-index** library (version 0.12.22) along with its experimental extensions to enable large language models to provide real-time responses. ## Features