diff --git a/chatbot/chatbot.py b/chatbot/chatbot.py new file mode 100644 index 0000000..97d3ea1 --- /dev/null +++ b/chatbot/chatbot.py @@ -0,0 +1,98 @@ +import uuid +from typing import Any, Dict, List +import json + +import requests +import sseclient + +import streamlit as st + +SYSTEM_PROMPT = "You are a helpful smart assistant. Search the web and answer the correct question with ciations. You have access to the web to answer any question" + + +def build_prompt(): + prompt = "" + for msg in st.session_state.messages: + prompt += msg["role"] + ":\t" + msg["content"] + "\n" + return prompt + +ydc_api_key = st.secrets["YDC_API_KEY"] + + +def get_ydc_answer(messages, mode='smart', stream=False): + query = build_prompt() + headers = {'x-api-key': ydc_api_key, 'Content-Type': 'application/json'} + endpoint = f"https://chat-api.you.com/{mode}" # use /research for Research mode + params = {"query":query, "chat_id": st.session_state.chat_id} + response = requests.post(endpoint, json=params, headers=headers) + print(response) + return response.json() + +def get_ydc_stream_answer(mode='smart'): + query = build_prompt() + headers = {'x-api-key': ydc_api_key} + + endpoint = f"https://chat-api.you.com/{mode}" # use /research for Research mode + params = {"query": query, "stream": True} + headers = { + 'x-api-key': ydc_api_key, + } + response = requests.get(endpoint, headers=headers, params=params, stream=True) + client = sseclient.SSEClient(response) + for event in client: + for event in response.iter_lines(): + if event.event == "token": + yield (str(event.data)) + + return None + +# Better way to clear history +def clear_chat_history(): + st.session_state.chat_id = str(uuid.uuid4()) + st.session_state["messages"] = [ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "assistant", "content": "What can I help you build today?"} + ] + + +with st.sidebar: + model_select = st.selectbox("Select a model", ["smart", "research"]) + st.button('Reset Chat', on_click=clear_chat_history) + + + +st.title("💬 YOU.COM API ASSISTANT") +st.caption(""" 🚀 Let us help you build with You.com""") + + +if "messages" not in st.session_state: + st.session_state.chat_id = str(uuid.uuid4()) + st.session_state["messages"] = [ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "assistant", "content": "What can I help you build today?"} + ] + +# Display or clear messages +for msg in st.session_state.messages: + if msg["role"] != "system": + st.chat_message(msg["role"]).write(msg["content"]) + +# User provided prompt +if prompt := st.chat_input(): + st.session_state.messages.append({"role": "user", "content": prompt}) + st.chat_message("user").write(prompt) + + +# Generate response if last reponse not from assistant +if st.session_state.messages[-1]["role"] != "assistant": + with st.chat_message("assistant"): + # Test no stream + full_response = get_ydc_answer(model_select)["answer"] + st.write(full_response) + + # Test Stream Currently not working + #full_response = get_ydc_stream_answer(model_select) + #st.write_stream(full_response) + + message = {"role": "assistant", "content": full_response} + st.session_state.messages.append(message) \ No newline at end of file diff --git a/chatbot/requirements.txt b/chatbot/requirements.txt new file mode 100644 index 0000000..e251330 --- /dev/null +++ b/chatbot/requirements.txt @@ -0,0 +1 @@ +streamlit \ No newline at end of file diff --git a/legal_assistant_csv_langchain_chatbot.ipynb b/legal_assistant_csv_langchain_chatbot.ipynb index 86a7e86..c3b8aba 100644 --- a/legal_assistant_csv_langchain_chatbot.ipynb +++ b/legal_assistant_csv_langchain_chatbot.ipynb @@ -11,7 +11,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This chatbot retrieves context from a proprietary data source and the web to answer questions about federal laws in the United States of America (USA). The proprietary datasource is a CSV file of all federal laws and their revision history in the USA. The web data required to respond to the user's questions is retrieved using the You.com API. The chatbot is implemented as an agent in using LangChain and LangGraph." + "This chatbot retrieves context from a proprietary datasource and the web to answer questions about federal laws in the United States of America (USA). The proprietary datasource is a CSV file of all federal laws and their revision history in the USA. The web data required to respond to the user's questions is retrieved using the You.com API. The chatbot is implemented as an agent in Langchain." ] }, { @@ -23,7 +23,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -34,24 +34,23 @@ "! pip install langchain==0.2.1\n", "! pip install langchain_community==0.2.1\n", "! pip install langchain_openai==0.1.7\n", + "! pip install langchain-anthropic\n", "! pip install langchain_text_splitters==0.2.0\n", "! pip install langchain_core==0.2.1\n", "! pip install numpy==1.26.4\n", - "! pip install openai==1.30.3\n", - "! pip install python-dotenv==1.0.1\n", - "! pip install faiss-cpu==1.8.0" + "! pip install python-dotenv" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Load in the US Federal Laws dataset and create a vector database representation of this dataset, which will then be converted into a LangChain Retriever and Tool" + "## Load in the US Federal Laws dataset and create a vector database representation of this dataset, which will then be converted into a Langchain Retriever and Tool" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -242,7 +241,7 @@ "4 False " ] }, - "execution_count": 25, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -259,7 +258,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -270,7 +269,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -279,21 +278,24 @@ "True" ] }, - "execution_count": 27, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# The YDC_API_KEY and OPENAI_API_KEY should be defined in a .env file\n", - "# Let's load the API keys in from the .env file\n", - "import dotenv\n", - "dotenv.load_dotenv(\".env\", override=True)" + "#os.environ[\"YDC_API_KEY\"] = \"\"\n", + "#os.environ[\"OPENAI_API_KEY\"] = \"\"\n", + "#os.environ[\"ANTHROPIC_API_KEY\"] = \"\"\n", + "\n", + "# Or load from .env file\n", + "from dotenv import load_dotenv\n", + "load_dotenv()" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -307,7 +309,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -324,7 +326,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -332,17 +334,17 @@ "text/plain": [ "[Document(page_content='row_number: 1885\\naction: An Act\\nTitle: In addition to the act, entitled \"An act for the prompt settlement of public accounts,\" and for the punishment of the crime of perjury\\nsal_volume: 3\\nsal_page_start: 770\\nBillCitation: NA\\ncongress_number: 17\\nchapter: 37\\nsession_number: 2\\npl_no: NA\\ndate_of_passage: 1823-03-01\\nsecondary_date: NA\\ndates_conflict: NA\\nSource: HeinOnline\\nURL: NA\\nalternate_sal_volume: NA\\nalternate_sal_page_start: NA\\nhas_alternate_sal_citation: FALSE', metadata={'source': 'us_laws_dataset.csv', 'row': 1884}),\n", " Document(page_content='row_number: 38724\\naction: An Act\\nTitle: An act to permit the use of unsworn declarations under penalty of perjury as evidence in Federal proceedings\\nsal_volume: 90\\nsal_page_start: 2534\\nBillCitation: H.R. 15531\\ncongress_number: 94\\nchapter: NA\\nsession_number: 2\\npl_no: 94-550\\ndate_of_passage: 1976-10-18\\nsecondary_date: NA\\ndates_conflict: FALSE\\nSource: NA\\nURL: https://www.govinfo.gov/content/pkg/STATUTE-90/pdf/STATUTE-90-Pg2534.pdf\\nalternate_sal_volume: 90\\nalternate_sal_page_start: 2534\\nhas_alternate_sal_citation: TRUE', metadata={'source': 'us_laws_dataset.csv', 'row': 38720}),\n", - " Document(page_content='row_number: 33140\\naction: An Act\\nTitle: An Act to amend section 200 of the Soldiers and Sailors Civil Relief Act of 1940 to permit the establishment of certain facts by a declaration under penalty of perjury in lieu of an affidavit, and for other purposes\\nsal_volume: 74\\nsal_page_start: 820\\nBillCitation: H.R. 3313\\ncongress_number: 86\\nchapter: NA\\nsession_number: 2\\npl_no: 86-721\\ndate_of_passage: 1960-09-08\\nsecondary_date: NA\\ndates_conflict: FALSE\\nSource: NA\\nURL: https://www.govinfo.gov/content/pkg/STATUTE-74/pdf/STATUTE-74-Pg820.pdf\\nalternate_sal_volume: 74\\nalternate_sal_page_start: 820\\nhas_alternate_sal_citation: TRUE', metadata={'source': 'us_laws_dataset.csv', 'row': 33138})]" + " Document(page_content='row_number: 44400\\naction: An Act\\nTitle: An act to amend title 18, United States Code, with respect to witness retaliation, witness tampering and jury tampering\\nsal_volume: 110\\nsal_page_start: 3017\\nBillCitation: H.R. 3120\\ncongress_number: 104\\nchapter: NA\\nsession_number: 2\\npl_no: 104-214\\ndate_of_passage: 1996-10-01\\nsecondary_date: NA\\ndates_conflict: FALSE\\nSource: NA\\nURL: https://www.govinfo.gov/content/pkg/STATUTE-110/pdf/STATUTE-110-Pg3017.pdf\\nalternate_sal_volume: 110\\nalternate_sal_page_start: 3017\\nhas_alternate_sal_citation: TRUE', metadata={'source': 'us_laws_dataset.csv', 'row': 44396})]" ] }, - "execution_count": 30, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# test out the similarity search\n", - "query = \"What laws and acts relate to perjury?\"\n", + "query = \"What laws and amendments relate to perjury?\"\n", "response = db.similarity_search(query, k=10)\n", "# let's look at the first 3 retrieved docs\n", "response[:3]" @@ -350,7 +352,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -372,16 +374,9 @@ "## Instantiating the You.com Tool in Langchain" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "LangChain provides a wrapper around the You.com API and a You.com Tool. For more information, please visit: https://python.langchain.com/v0.1/docs/integrations/tools/you/" - ] - }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -394,7 +389,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -405,14 +400,14 @@ " Document(page_content='U.S. v. Arden-Mayfair, Inc., Matanuska Maid, Inc.; and Meadowmoor Alaska Dairy, Inc. U.S. v. Argos USA LLC, f.k.a. Argos Ready Mix LLC', metadata={'url': 'https://www.justice.gov/atr/antitrust-case-filings-alpha', 'thumbnail_url': None, 'title': 'Antitrust Division | Antitrust Case Filings | United States Department of Justice', 'description': 'An official website of the United States government · Official websites use .gov A .gov website belongs to an official government organization in the United States'})]" ] }, - "execution_count": 33, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# test out the You.com search tool\n", - "response = ydc_tool.invoke(\"Tell me about a recent high-profile case related to antitrust in the USA.\")\n", + "response = ydc_tool.invoke(\"Tell me about a recent high-profile case related to antitrust in the USA?\")\n", "# let's look at the first 3 results\n", "response[:3]" ] @@ -426,13 +421,15 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "from langchain_openai import ChatOpenAI\n", + "from langchain_anthropic import ChatAnthropic\n", "\n", - "llm = ChatOpenAI(model=\"gpt-4o\", temperature=0.5)" + "llm = ChatAnthropic(model='claude-3-5-sonnet-20240620')\n", + "#llm = ChatOpenAI(model=\"gpt-4o\", temperature=0.5)" ] }, { @@ -444,7 +441,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -467,62 +464,132 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 13, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "'Several laws in the United States address economic espionage:\\n\\n1. **Economic Espionage Act of 1996**:\\n - **Public Law**: 104-294\\n - **Date of Passage**: October 11, 1996\\n - **Summary**: This act criminalizes the theft or misappropriation of trade secrets with the intent or knowledge that the offense will benefit a foreign government, foreign instrumentality, or foreign agent.\\n - **URL**: [Economic Espionage Act of 1996](https://www.govinfo.gov/content/pkg/STATUTE-110/pdf/STATUTE-110-Pg3488.pdf)\\n\\n2. **An act to amend title 18, United States Code, to provide for increased penalties for foreign and economic espionage, and for other purposes**:\\n - **Public Law**: 112-269\\n - **Date of Passage**: January 14, 2013\\n - **Summary**: This act amended Title 18 of the United States Code to increase penalties for foreign and economic espionage.\\n - **URL**: [Public Law 112-269](https://www.govinfo.gov/content/pkg/STATUTE-126/html/STATUTE-126-Pg2442.htm)\\n\\n3. **An act to clarify the scope of the Economic Espionage Act of 1996**:\\n - **Public Law**: 112-236\\n - **Date of Passage**: December 28, 2012\\n - **Summary**: This act clarifies certain aspects of the Economic Espionage Act of 1996 to ensure its effective enforcement.\\n - **URL**: [Public Law 112-236](https://www.govinfo.gov/content/pkg/STATUTE-126/html/STATUTE-126-Pg1627.htm)\\n\\n4. **An Act to amend section three, title one, of the Act entitled \"An Act to punish acts of interference with the foreign relations, the neutrality, and the foreign commerce of the United States, to punish espionage, and better to enforce the criminal laws of the United States, and for other purposes,\" approved June fifteenth, nineteen hundred and seventeen, and for other purposes**:\\n - **Public Law**: 65-150\\n - **Date of Passage**: May 16, 1918\\n - **Summary**: This act amended earlier legislation to further strengthen laws against espionage and interference with foreign relations and commerce.\\n - **URL**: Not available\\n\\nThese laws collectively address various aspects of economic espionage, including definitions, penalties, and clarifications to ensure effective enforcement.'" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "Based on the information retrieved, I can provide you with an overview of the laws pertaining to perjury in the United States and a recent case related to a violation of these laws.\n", + "\n", + "Laws pertaining to perjury in the US:\n", + "\n", + "1. The main federal statutes criminalizing perjury are 18 U.S.C. §§ 1621 and 1623.\n", + "\n", + "2. 18 U.S.C. § 1621 (Perjury generally):\n", + " - This is the traditional, broadly applicable perjury statute.\n", + " - It applies to false statements made under oath before legislative, administrative, or judicial bodies.\n", + " - Key elements include:\n", + " a) Taking an oath before a competent tribunal, officer, or person.\n", + " b) Willfully stating or subscribing to any material matter which the person does not believe to be true.\n", + " - The penalty includes fines and imprisonment for up to five years.\n", + "\n", + "3. 18 U.S.C. § 1623 (False declarations before grand jury or court):\n", + " - This statute specifically addresses false statements made in court or before a grand jury.\n", + " - It was added in 1971 as a result of the Organized Crime Control Act of 1970.\n", + "\n", + "4. The legal standard for perjury, as established in United States v. Dunnigan (1993), requires that a person:\n", + " - Testifies under oath or affirmation\n", + " - Gives false testimony concerning a material matter\n", + " - Does so with the willful intent to provide false testimony (not as a result of confusion, mistake, or faulty memory)\n", + "\n", + "5. Subornation of perjury (18 U.S.C. § 1622) is also a crime, which involves inducing another person to commit perjury.\n", + "\n", + "6. Some notable aspects of US perjury law include:\n", + " - A defense allowing individuals to recant a perjurious statement during the same proceeding.\n", + " - Application to written declarations made under penalty of perjury, such as tax returns.\n", + "\n", + "Regarding a recent case related to a violation of these laws:\n", + "\n", + "Unfortunately, the search results don't provide information about a specific recent case involving perjury. However, it's worth noting that perjury cases can be prosecuted at both federal and state levels. The FBI has primary investigative responsibility for perjury violations in cases involving departments and agencies of the United States, with some exceptions for specific agencies.\n", + "\n", + "To find a recent case, you would need to search through recent court records or news reports. Perjury cases can arise in various contexts, including criminal trials, civil litigation, grand jury proceedings, and even in official documents like tax returns.\n", + "\n", + "If you're interested in finding a specific recent case, I would recommend searching through legal databases or recent news articles about perjury convictions or charges. This would provide the most up-to-date information on perjury cases in the United States.\n" + ] } ], "source": [ - "agent_executor.invoke(input={\"messages\": \"What laws in the US address economic espionage?\"}, config={\"configurable\": {\"thread_id\": \"xyz_789\"}})[\"messages\"][-1].content" + "prompt_1 = \"What laws in the US pertain to perjury and is there a recent case in the US that relates to a violation of these laws?\"\n", + "\n", + "result = agent_executor.invoke(input={\"messages\": prompt_1}, config={\"configurable\": {\"thread_id\": \"xyz_789\"}})[\"messages\"][-1].content\n", + "print(result)" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 14, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "\"The most famous U.S. Supreme Court case related to economic espionage is **Totten v. United States**, 92 U.S. 105 (1876). This case is significant because it established the principle that certain secret contracts, particularly those involving espionage, could not be publicly reviewed by courts due to the necessity of maintaining confidentiality.\\n\\n### Key Details:\\n- **Case Citation**: Totten v. United States, 92 U.S. 105 (1876)\\n- **Summary**: The Supreme Court ruled that judicial review of espionage contracts is precluded when such review would inevitably lead to the disclosure of matters that the law regards as confidential. This principle was later upheld and expanded in subsequent cases, such as Tenet v. Doe (2005), which involved a contract claim against the CIA brought by Cold War-era spies.\\n\\n### Importance:\\n- **State Secrets Privilege**: The Totten case was an important precursor to the court's 1953 decision in United States v. Reynolds, wherein the court recognized the State Secrets Privilege.\\n- **Judicial Review Preclusion**: It clarified that judicial review is precluded in cases where success depends upon the existence of a secret espionage relationship with the government.\\n\\nFor more information, you can read about the case on [Wikipedia](https://en.wikipedia.org/wiki/Totten_v._United_States).\"" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "Based on the search results, the most famous US Supreme Court perjury case is Bronston v. United States, 409 U.S. 352 (1973). This case is considered a seminal decision in US perjury law. Here are the key points about this case:\n", + "\n", + "1. Significance: Bronston v. United States is the controlling legal standard for perjury in federal jurisprudence.\n", + "\n", + "2. Decision: The Supreme Court strictly construed the federal perjury statute.\n", + "\n", + "3. Ruling: Chief Justice Warren Burger, writing for a unanimous Court, held that responses to questions made under oath that relay truthful information in themselves but are intended to mislead or evade the examiner could not be prosecuted as perjury.\n", + "\n", + "4. Implication: The criminal justice system must rely on more carefully worded follow-up questions to prevent evasive answers, rather than prosecuting for perjury.\n", + "\n", + "5. Legal Standard: The Court established that for a statement to be considered perjury, it must be false, concern a material matter, and be made with the willful intent to provide false testimony. It cannot be the result of confusion, mistake, or faulty memory.\n", + "\n", + "6. Impact: This decision has been cited in many subsequent cases and has become the controlling legal standard for perjury in federal courts.\n", + "\n", + "7. Controversy: The ruling has been criticized for creating a loophole in perjury statutes, essentially allowing a witness to mislead without legal consequences as long as their statements are literally true.\n", + "\n", + "8. Later Applications: The Bronston standard was notably invoked during Bill Clinton's impeachment proceedings in 1998 as a defense against perjury charges.\n", + "\n", + "While there have been other important perjury cases in the Supreme Court's history, Bronston v. United States stands out as the most famous and influential in shaping modern perjury law in the United States.\n" + ] } ], "source": [ - "agent_executor.invoke(input={\"messages\": \"What is the most famous US Supreme Court case related to economic espionage?\"}, config={\"configurable\": {\"thread_id\": \"xyz_789\"}})[\"messages\"][-1].content" + "prompt_2 = \"What is the most famous US Supreme Court perjury case?\"\n", + "result = agent_executor.invoke(input={\"messages\": prompt_2}, config={\"configurable\": {\"thread_id\": \"xyz_789\"}})[\"messages\"][-1].content\n", + "print(result)" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 15, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "'Certainly, several federal laws in the United States pertain to economic espionage and could potentially be applied in cases involving espionage activities. Here are some key laws:\\n\\n1. **Economic Espionage Act of 1996 (EEA)**\\n - **Public Law No.**: 104-294\\n - **Description**: This act criminalizes the theft or misappropriation of trade secrets with the intent or knowledge that the offense will benefit a foreign government, foreign instrumentality, or foreign agent.\\n - **Key Sections**:\\n - **18 U.S.C. § 1831**: Addresses economic espionage involving foreign entities.\\n - **18 U.S.C. § 1832**: Addresses theft of trade secrets for commercial or economic advantage.\\n\\n2. **Espionage Act of 1917**\\n - **Public Law No.**: 65-24\\n - **Description**: Originally enacted to address espionage during wartime, this law has been amended to cover various forms of espionage, including economic espionage.\\n - **Key Sections**:\\n - **18 U.S.C. §§ 792-799**: Covers a wide range of espionage activities, including gathering or delivering defense information to aid a foreign government.\\n\\n3. **Foreign Agents Registration Act (FARA)**\\n - **Public Law No.**: 75-583\\n - **Description**: Requires individuals acting as agents of foreign principals in a political or quasi-political capacity to disclose their relationship with the foreign government and information about related activities and finances.\\n - **Key Sections**:\\n - **22 U.S.C. § 611 et seq.**: Addresses registration and disclosure requirements for foreign agents.\\n\\n4. **Computer Fraud and Abuse Act (CFAA)**\\n - **Public Law No.**: 99-474\\n - **Description**: This act addresses unauthorized access to computers and networks, which can be relevant in cases where economic espionage involves cyber activities.\\n - **Key Sections**:\\n - **18 U.S.C. § 1030**: Covers unauthorized access to computers to obtain information, commit fraud, or cause damage.\\n\\n5. **Trade Secrets Protection Act**\\n - **Public Law No.**: 114-153\\n - **Description**: This act provides a federal private right of action for the misappropriation of trade secrets.\\n - **Key Sections**:\\n - **18 U.S.C. § 1836**: Allows trade secret owners to bring a civil action for the misappropriation of their trade secrets.\\n\\n### Application to Totten v. United States\\nWhile the **Totten v. United States** case primarily dealt with the enforceability of espionage contracts and the preclusion of judicial review to maintain confidentiality, the following laws could have been relevant if the case involved modern aspects of economic espionage:\\n\\n- **Economic Espionage Act of 1996**: If the case involved the theft or misappropriation of trade secrets intended to benefit a foreign entity, the EEA would be directly applicable.\\n- **Espionage Act of 1917**: Given its broad coverage of espionage activities, this act could also apply if the activities involved national defense information or aiding a foreign government.\\n- **Computer Fraud and Abuse Act**: If any aspect of the espionage involved unauthorized access to computer systems or networks, the CFAA could be relevant.\\n\\nThese laws provide a robust framework for addressing various facets of economic espionage, from traditional espionage activities to modern cyber-related offenses.'" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "Based on the information provided about US laws pertaining to perjury and considering the Bronston v. United States case, it's important to note that the laws have evolved over time. However, to answer your question about whether other laws could have been applied in this case, we need to consider the laws that were in effect at the time of the Bronston case in 1973. Let's analyze the situation:\n", + "\n", + "1. Main Perjury Statute: The primary law applied in Bronston v. United States was likely 18 U.S.C. § 1621, which is the general federal perjury statute. This law was already in place at the time of the case.\n", + "\n", + "2. False Declarations Statute: In 1970, just a few years before the Bronston case, Congress enacted 18 U.S.C. § 1623, which specifically addresses false declarations before a grand jury or court. This law could potentially have been applied, but it was relatively new at the time of the Bronston case.\n", + "\n", + "3. Unsworn Declarations: The act \"to permit the use of unsworn declarations under penalty of perjury as evidence in Federal proceedings\" (Public Law 94-550) was passed in 1976, after the Bronston case. Therefore, it couldn't have been applied in this specific case.\n", + "\n", + "4. Witness Tampering: The act \"to amend title 18, United States Code, with respect to witness retaliation, witness tampering and jury tampering\" (Public Law 104-214) was passed in 1996, long after the Bronston case. So this also couldn't have been applied.\n", + "\n", + "5. Earlier Perjury Laws: The 1823 act \"for the punishment of the crime of perjury\" might have influenced the legal framework at the time, but it's likely that its provisions had been superseded or incorporated into more recent statutes by 1973.\n", + "\n", + "Given this information, it appears that the main laws that could have been applied in the Bronston case were:\n", + "\n", + "1. 18 U.S.C. § 1621 (the general perjury statute)\n", + "2. 18 U.S.C. § 1623 (false declarations before a grand jury or court)\n", + "\n", + "The Court's decision in Bronston focused on interpreting these statutes, particularly § 1621. The Court's ruling essentially narrowed the application of the perjury statute by holding that literally true but misleading statements do not constitute perjury.\n", + "\n", + "It's worth noting that while other related laws existed or were later enacted, the Bronston decision set a precedent in interpreting perjury statutes that has influenced how these laws are applied in subsequent cases. The Court's interpretation emphasized the importance of precise questioning and placed the burden on the questioner to clarify evasive answers, rather than relying on perjury charges to address misleading but technically truthful statements.\n" + ] } ], "source": [ - "agent_executor.invoke(input={\"messages\": \"Based on your knowledge of federal laws in the US pertaining to economic espionage, were there any other laws that could have been applied in this case?\"}, config={\"configurable\": {\"thread_id\": \"xyz_789\"}})[\"messages\"][-1].content" + "prompt_3 = \"Based on your knowledge of all laws in the US pertaining to perjury, were there any other laws that could have been applied in this case?\"\n", + "result = agent_executor.invoke(input={\"messages\":prompt_3}, config={\"configurable\": {\"thread_id\": \"xyz_789\"}})[\"messages\"][-1].content\n", + "print(result)" ] }, { @@ -534,14 +601,15 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "import secrets\n", + "from typing import Union\n", "\n", "class CSV_QA_Bot:\n", - " def __init__(self, llm: ChatOpenAI, csv_files: list[str], num_web_results_to_fetch: int = 10):\n", + " def __init__(self, llm: Union[ChatOpenAI, ChatAnthropic], csv_files: list[str], num_web_results_to_fetch: int = 10):\n", " self._llm = llm\n", " \n", " docs = self._load_csv_files(csv_files)\n", @@ -555,8 +623,8 @@ " # convert this retriever into a Langchain tool\n", " self._faiss_retriever_tool = create_retriever_tool(\n", " self._faiss_retriever,\n", - " name = \"custom_dataset_retriever\",\n", - " description = \"Retrieve relevant context from custom dataset.\"\n", + " name = \"law_dataset_retriever\",\n", + " description = \"Retrieve relevant context from the US laws dataset.\"\n", " )\n", " \n", " # instantiate the YDC search tool in Langchain\n", @@ -571,7 +639,7 @@ " self._memory = MemorySaver()\n", " \n", " # create the agent executor\n", - " self._agent_executor = chat_agent_executor.create_tool_calling_executor(self._llm, self._tools, checkpointer=self._memory)\n", + " self._agent_executor = chat_agent_executor.create_tool_calling_executor(self._llm, tools, checkpointer=memory)\n", " \n", " # generate a thread ID for to keep track of conversation history\n", " self._thread_id = self._generate_thread_id()\n", @@ -596,8 +664,8 @@ " def invoke_bot(self, input_str: str) -> str:\n", " input = {\"messages\": input_str}\n", " config = {\"configurable\": {\"thread_id\": self._thread_id}}\n", - " output = self._agent_executor.invoke(input=input, config=config)\n", - " return output[\"messages\"][-1].content" + " output = self._agent_executor.invoke(input=input, config=config)[\"messages\"][-1].content\n", + " return output" ] }, { @@ -609,73 +677,136 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ - "llm = ChatOpenAI(model=\"gpt-4o\", temperature=0.5)\n", "conversational_agent = CSV_QA_Bot(llm, csv_files=[\"us_laws_dataset.csv\"])" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 24, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "\"Insider trading in the USA is primarily addressed through several key pieces of legislation:\\n\\n1. **Securities Act of 1933**: This act requires full and fair disclosure of the character of securities sold in interstate and foreign commerce and through the mails, and aims to prevent fraud in the sale of securities. [Source](https://www.govinfo.gov/content/pkg/STATUTE-48/pdf/STATUTE-48-Pg74.pdf)\\n\\n2. **Securities Exchange Act of 1934**: This act provides for the regulation of securities exchanges and over-the-counter markets operating in interstate and foreign commerce and through the mails. It aims to prevent inequitable and unfair practices on such exchanges and markets. Sections 16(b) and 10(b) of this act directly and indirectly address insider trading. [Source](https://www.govinfo.gov/content/pkg/STATUTE-48/pdf/STATUTE-48-Pg881.pdf)\\n\\n3. **Insider Trading Sanctions Act of 1984**: This act enhances the penalties for insider trading violations, aiming to deter and punish those who engage in insider trading. [Source](https://www.govinfo.gov/content/pkg/STATUTE-98/pdf/STATUTE-98-Pg1264.pdf)\\n\\n4. **Insider Trading and Securities Fraud Enforcement Act of 1988**: This act further strengthens the penalties and enforcement mechanisms for insider trading and securities fraud. [Source](https://www.govinfo.gov/content/pkg/STATUTE-102/pdf/STATUTE-102-Pg4677.pdf)\\n\\nAdditionally, the **Securities and Exchange Commission (SEC)** plays a crucial role in enforcing these laws and prosecuting insider trading cases. The SEC has brought cases against corporate officers, directors, employees, and other individuals who trade securities based on material non-public information.\\n\\nFor more detailed information, you can refer to the respective acts and the SEC's resources on insider trading.\"" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "Based on the search results, I can provide information about a recent perjury case in the United States. The case involves Craig German, a 60-year-old man from Kernersville, North Carolina. This case is particularly interesting because it relates to perjury committed during the sentencing phase of a previous case.\n", + "\n", + "Here are the key details of this recent perjury case:\n", + "\n", + "1. Background: Craig German was previously convicted for conspiring to steal trade secrets from aircraft manufacturing companies.\n", + "\n", + "2. New Charges: After his initial conviction, German faced additional charges for committing perjury in his prior case and for providing false statements to a government agency (the FBI).\n", + "\n", + "3. Trial and Conviction: A federal jury in the U.S. District Court for the Southern District of Georgia found German guilty of perjury and false statements to a government agency after a three-day trial.\n", + "\n", + "4. Specifics of the Perjury:\n", + " - During the sentencing portion of his prior case, German testified under oath and denied copying more than 15,000 proprietary engineering drawings and documents onto a removable storage device while employed at an aircraft manufacturing company.\n", + " - The jury found this testimony to be false, constituting perjury.\n", + "\n", + "5. Additional False Statements: German was also found guilty of providing a materially false statement to the FBI during a voluntary meeting, where he emphatically denied copying, taking, or otherwise transferring the proprietary documents.\n", + "\n", + "6. Prosecution: The case was investigated by the FBI and prosecuted by Assistant U.S. Attorneys Jennifer G. Solari and Steven H. Lee.\n", + "\n", + "7. Potential Consequences: The conviction for perjury and false statements means German faces additional prison time on top of his sentence from the previous case involving trade secret theft.\n", + "\n", + "This case illustrates how seriously the U.S. justice system takes perjury, especially when it occurs during official proceedings like sentencing hearings. It also demonstrates that lying to federal investigators (in this case, the FBI) can result in additional criminal charges.\n", + "\n", + "The case aligns with the laws we discussed earlier, particularly the ones related to using declarations under penalty of perjury in federal proceedings and addressing witness tampering. It shows that the legal system actively enforces these laws to maintain the integrity of court proceedings and investigations.\n" + ] } ], "source": [ - "conversational_agent.invoke_bot(\"What laws in the USA address insider trading?\")" + "prompt_1 = \"What laws in the US pertain to perjury and is there a recent case in the US that relates to a violation of these laws?\"\n", + "\n", + "print(conversational_agent.invoke_bot(prompt_1))" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 25, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "'One of the most famous U.S. Supreme Court cases involving insider trading is **Salman v. United States**. This case addressed the issue of what constitutes a benefit to the tipper in insider trading cases.\\n\\n### Case Overview:\\n- **Defendant**: Bassam Salman\\n- **Facts**: Salman was convicted of making nearly $1.2 million by trading on information that came from his brother-in-law, Maher Kara, who was a Citigroup investment banker. Maher Kara had provided tips about mergers involving Citi clients to his brother, who in turn tipped Salman.\\n- **Legal Question**: The case centered on whether prosecutors needed to prove that the tipper (Maher Kara) received a tangible benefit for providing the insider information.\\n- **Supreme Court Decision**: The court upheld the conviction, emphasizing that a gift of confidential information to a relative or friend could be enough to establish liability for insider trading, even if the tipper did not receive a tangible benefit in return.\\n\\n### Significance:\\nThe **Salman v. United States** decision clarified that insider trading laws could be enforced even when the tipper did not receive a tangible benefit, as long as the tipper provided the information as a gift to a relative or friend. This case has had a significant impact on how insider trading cases are prosecuted in the United States.\\n\\nFor more details, you can refer to the [Reuters article on the case](https://www.reuters.com/article/us-usa-court-insidertrading/u-s-supreme-court-agrees-to-hear-insider-trading-appeal-idUSKCN0UX1VG/).'" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "Based on the search results, the most famous US Supreme Court perjury case is Bronston v. United States, 409 U.S. 352 (1973). This case is considered seminal in US jurisprudence regarding perjury. Here are the key points about this landmark case:\n", + "\n", + "1. Significance: Bronston v. United States is the controlling legal standard for perjury in federal jurisprudence and has been widely cited since its decision.\n", + "\n", + "2. Ruling: The Supreme Court, in a unanimous decision written by Chief Justice Warren Burger, strictly construed the federal perjury statute.\n", + "\n", + "3. Key principle: The Court held that responses to questions made under oath that relay truthful information in themselves but are intended to mislead or evade the examiner cannot be prosecuted as perjury.\n", + "\n", + "4. Implications: This decision essentially created a loophole in perjury statutes, allowing witnesses to potentially mislead without legal consequences as long as their statements are literally true.\n", + "\n", + "5. Remedy: The Court stated that the criminal justice system must rely on more carefully worded follow-up questions to address evasive answers, rather than prosecuting for perjury.\n", + "\n", + "6. Later impact: The Bronston decision was invoked during Bill Clinton's impeachment proceedings in 1998 as a defense against perjury charges.\n", + "\n", + "7. Criticism: The ruling has been long criticized for potentially allowing witnesses to lie without consequences, but subsequent Courts have refused to overrule or limit its scope.\n", + "\n", + "8. Ongoing relevance: Legal experts continue to analyze cases under the Bronston standard, as seen in the example of President Clinton's testimony during his impeachment proceedings.\n", + "\n", + "This case remains crucial in understanding how perjury is defined and prosecuted in the United States federal court system, making it the most famous Supreme Court perjury case to date.\n" + ] } ], "source": [ - "conversational_agent.invoke_bot(\"What is the most famous US Supreme Court case of insider trading?\")" + "prompt_2 = \"What is the most famous US Supreme Court perjury case?\"\n", + "\n", + "print(conversational_agent.invoke_bot(prompt_2))" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 26, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "\"Yes, several federal laws in the USA related to insider trading could have been applied in the **Salman v. United States** case. These laws provide the framework for prosecuting insider trading and include:\\n\\n### 1. **Securities Exchange Act of 1934**\\n- **Section 10(b)**: This section prohibits any manipulative or deceptive device or contrivance in connection with the purchase or sale of any security.\\n- **Rule 10b-5**: Promulgated under Section 10(b), this rule makes it unlawful to employ any device, scheme, or artifice to defraud, make any untrue statement of a material fact, or engage in any act, practice, or course of business that operates as a fraud or deceit upon any person, in connection with the purchase or sale of any security.\\n\\n### 2. **Insider Trading Sanctions Act of 1984**\\n- This act enhances the penalties for insider trading violations, including civil penalties up to three times the profit gained or loss avoided from the illegal trades.\\n\\n### 3. **Insider Trading and Securities Fraud Enforcement Act of 1988**\\n- This act further strengthens the penalties and enforcement mechanisms for insider trading and securities fraud. It allows for the imposition of treble damages (three times the amount of profit gained or loss avoided) and increases the criminal penalties for insider trading.\\n\\n### 4. **Sarbanes-Oxley Act of 2002**\\n- **Section 807**: This section addresses securities fraud and imposes severe penalties for knowingly executing or attempting to execute a scheme to defraud any person in connection with any security.\\n- **Section 906**: This section imposes criminal penalties on corporate officers who knowingly certify false financial reports.\\n\\n### 5. **Dodd-Frank Wall Street Reform and Consumer Protection Act of 2010**\\n- This act includes provisions that enhance the SEC's ability to prosecute insider trading and other securities violations. It also provides for whistleblower incentives and protections, encouraging individuals to report insider trading violations.\\n\\n### 6. **Title 18, United States Code, Section 1348**\\n- This section criminalizes securities fraud and provides for penalties of up to 25 years in prison for those found guilty of defrauding any person in connection with any security.\\n\\n### Application to Salman v. United States:\\nIn the **Salman v. United States** case, the primary laws applied were the Securities Exchange Act of 1934 and Rule 10b-5, which directly address fraudulent activities in securities trading. However, the following additional laws could have been relevant:\\n\\n- The **Insider Trading Sanctions Act of 1984** and the **Insider Trading and Securities Fraud Enforcement Act of 1988** could have been used to impose enhanced civil penalties.\\n- The **Sarbanes-Oxley Act of 2002** and **Dodd-Frank Act** provisions could have provided additional grounds for prosecution and penalties.\\n- **Title 18, Section 1348** could have been invoked to pursue criminal charges for securities fraud.\\n\\nThese laws collectively ensure that insider trading is prosecuted comprehensively, with both civil and criminal penalties available to deter and punish such conduct.\"" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "Based on the information retrieved about US laws pertaining to perjury and considering the Bronston v. United States case, it's important to note that the Bronston case was decided in 1973, which predates some of the laws mentioned in the retrieval. However, we can analyze whether any of these laws or other existing laws at the time could have been applied in this case:\n", + "\n", + "1. The 1823 Act for the punishment of the crime of perjury: This act was likely superseded by more modern statutes by the time of the Bronston case, but it shows the long-standing nature of perjury laws in the US.\n", + "\n", + "2. The federal perjury statute (18 U.S.C. § 1621): This was the primary law under consideration in the Bronston case. The Court's interpretation of this statute led to the ruling that literal truth, even if misleading, cannot be prosecuted as perjury.\n", + "\n", + "3. False Statements Statute (18 U.S.C. § 1001): While not specifically a perjury statute, this law prohibits making false statements to federal officials. It could potentially have been considered in cases where the statements were made to federal officials outside of court proceedings.\n", + "\n", + "4. Obstruction of Justice (18 U.S.C. § 1503): Although not a perjury statute per se, this law could potentially have been applied if the misleading statements were seen as an attempt to obstruct justice.\n", + "\n", + "5. The 1976 Act permitting unsworn declarations under penalty of perjury: This law came after the Bronston case, so it wouldn't have been applicable. However, it shows the evolution of perjury laws to include unsworn statements in certain circumstances.\n", + "\n", + "6. The 1996 Act amending Title 18 with respect to witness retaliation, witness tampering, and jury tampering: This also came after Bronston, but it demonstrates how laws related to court proceedings and testimony have expanded over time.\n", + "\n", + "In the specific context of the Bronston case, the Court's interpretation focused narrowly on the federal perjury statute. The ruling essentially stated that if a witness gives an answer that is literally true but nonresponsive to the question, it cannot be considered perjury even if the answer is intentionally misleading.\n", + "\n", + "Given this interpretation, it's unlikely that other laws could have been successfully applied in this specific case. The Court's decision set a high bar for perjury convictions, emphasizing the need for explicit falsehoods rather than merely misleading statements.\n", + "\n", + "However, in subsequent cases, prosecutors might consider using a combination of charges, including obstruction of justice or false statements, to address situations where witnesses provide misleading but technically truthful answers. The evolution of laws after Bronston also suggests that legislators have attempted to close some of the loopholes created by this decision, particularly in areas related to witness testimony and court proceedings.\n", + "\n", + "It's worth noting that the Bronston decision remains controversial, with some legal scholars arguing that it creates too high a bar for perjury convictions and potentially encourages evasive testimony. Nonetheless, it remains the controlling precedent in federal perjury cases.\n" + ] } ], "source": [ - "conversational_agent.invoke_bot(\"Based on your knowledge of federal laws in the USA related to insider trading, were there any other laws that could have been applied in this case?\")" + "prompt_3 = \"Based on your knowledge of all laws in the US pertaining to perjury, were there any other laws that could have been applied in this case?\"\n", + "\n", + "print(conversational_agent.invoke_bot(prompt_3))" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/you_dspy_vc_chat.ipynb b/you_dspy_vc_chat.ipynb new file mode 100644 index 0000000..d917da4 --- /dev/null +++ b/you_dspy_vc_chat.ipynb @@ -0,0 +1,688 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# you.com <> dspy: ChatVC\n", + "\n", + "A chatbot that I could ask questions about early-stage investing and any relevants news to potential investment opportunities." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "! pip install dspy==0.1.5\n", + "! pip install dotenv==0.0.5" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load API keys" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# assumes a .env file exists with api keys YDC_API_KEY and OPENAI_API_KEY\n", + "\n", + "from dotenv import load_dotenv\n", + "\n", + "load_dotenv()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Building blocks\n", + "\n", + "This section introduces the blocks to build out a RAG and ReAct agent later on." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create Language Model (lm)\n", + "\n", + "A Language Model (lm) in DSPy refers to a framework for programming and interacting with large language models." + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [], + "source": [ + "import dspy\n", + "\n", + "turbo = dspy.OpenAI(model='gpt-4o')\n", + "\n", + "# thread-safe built-in\n", + "dspy.settings.configure(lm=turbo)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Signature\n", + "\n", + "Every call to the LM in a DSPy program needs to have a `Signature`.\n", + "\n", + "A signature consists of three simple elements:\n", + "\n", + "* A minimal description of the sub-task the LM is supposed to solve.\n", + "* A description of one or more input fields (e.g., input question) that we will give to the LM.\n", + "* A description of one or more output fields (e.g., the question's answer) that we will expect from the LM." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "class BasicQA(dspy.Signature):\n", + " \"\"\"Answer questions with wise suggestions\"\"\"\n", + "\n", + " question = dspy.InputField()\n", + " answer = dspy.OutputField(desc=\"often between 40-50 words\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Try it out!" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Question: If you are advising a founder on how they should choose an invester in their company, what qualities should they look for?\n", + "Predicted Answer: Question: If you are advising a founder on how they should choose an investor in their company, what qualities should they look for?\n", + "Answer: Look for investors who align with your vision, bring industry expertise, and offer valuable networks. Ensure they have a track record of supporting startups and can provide strategic guidance. Compatibility in values and communication style is also crucial for a successful partnership.\n" + ] + } + ], + "source": [ + "question = \"If you are advising a founder on how they should choose an invester in their company, what qualities should they look for?\"\n", + "\n", + "# Define the predictor.\n", + "generate_answer = dspy.Predict(BasicQA)\n", + "\n", + "# Call the predictor on a particular input.\n", + "pred = generate_answer(question=question)\n", + "\n", + "# Print the input and the prediction.\n", + "print(f\"Question: {question}\")\n", + "print(f\"Predicted Answer: {pred.answer}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create Retriever Model (rm)\n", + "\n", + "A Retriever Model refers to a component that is responsible for retrieving relevant information from a retrieval corpus based on user queries. In this case, we'll be using You.com's news API as a retriever." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import warnings\n", + "from typing import Any, Literal, Optional, Union\n", + "\n", + "import requests\n", + "\n", + "import dspy\n", + "from dsp.utils import dotdict\n", + "\n", + "\n", + "class YouRM(dspy.Retrieve):\n", + " \"\"\"Retriever for You.com's Search and News API.\n", + "\n", + " [API reference](https://documentation.you.com/api-reference/)\n", + "\n", + " Args:\n", + " ydc_api_key: you.com API key, if `YDC_API_KEY` is not set in the environment\n", + " k: If ``endpoint=\"search\"``, the max snippets to return per search hit.\n", + " If ``endpoint=\"news\"``, the max articles to return.\n", + " endpoint: you.com endpoints\n", + " num_web_results: The max number of web results to return, must be under 20\n", + " safesearch: Safesearch settings, one of \"off\", \"moderate\", \"strict\", defaults to moderate\n", + " country: Country code, ex: 'US' for United States, see API reference for more info\n", + " search_lang: (News API) Language codes, ex: 'en' for English, see API reference for more info\n", + " ui_lang: (News API) User interface language for the response, ex: 'en' for English.\n", + " See API reference for more info\n", + " spellcheck: (News API) Whether to spell check query or not, defaults to True\n", + " \"\"\"\n", + "\n", + " def __init__(\n", + " self,\n", + " ydc_api_key: Optional[str] = None,\n", + " k: int = 3,\n", + " endpoint: Literal[\"search\", \"news\"] = \"search\",\n", + " num_web_results: Optional[int] = None,\n", + " safesearch: Optional[Literal[\"off\", \"moderate\", \"strict\"]] = None,\n", + " country: Optional[str] = None,\n", + " search_lang: Optional[str] = None,\n", + " ui_lang: Optional[str] = None,\n", + " spellcheck: Optional[bool] = None,\n", + " ):\n", + " super().__init__(k=k)\n", + "\n", + " # Data validation\n", + " if not ydc_api_key and not os.environ.get(\"YDC_API_KEY\"):\n", + " raise RuntimeError('You must supply `ydc_api_key` or set environment variable \"YDC_API_KEY\"')\n", + "\n", + " if endpoint not in (\"search\", \"news\"):\n", + " raise ValueError('`endpoint` must be either \"search\" or \"news\"')\n", + "\n", + " # Raise warning if News API-specific fields are set but endpoint is not \"news\"\n", + " if endpoint != \"news\":\n", + " news_api_fields = (search_lang, ui_lang, spellcheck)\n", + " for field in news_api_fields:\n", + " if field:\n", + " warnings.warn(\n", + " (\n", + " f\"News API-specific field '{field}' is set but `{endpoint=}`. \"\n", + " \"This will have no effect.\"\n", + " ),\n", + " UserWarning,\n", + " )\n", + "\n", + " self.ydc_api_key = ydc_api_key or os.environ.get(\"YDC_API_KEY\")\n", + " self.endpoint = endpoint\n", + " self.num_web_results = num_web_results\n", + " self.safesearch = safesearch\n", + " self.country = country\n", + " self.search_lang = search_lang\n", + " self.ui_lang = ui_lang\n", + " self.spellcheck = spellcheck\n", + "\n", + " def _generate_params(self, query: str) -> dict[str, Any]:\n", + " params = {\"safesearch\": self.safesearch, \"country\": self.country}\n", + "\n", + " if self.endpoint == \"search\":\n", + " params.update(\n", + " query=query,\n", + " num_web_results=self.num_web_results,\n", + " )\n", + " elif self.endpoint == \"news\":\n", + " params.update(\n", + " q=query,\n", + " count=self.num_web_results,\n", + " search_lang=self.search_lang,\n", + " ui_lang=self.ui_lang,\n", + " spellcheck=self.spellcheck,\n", + " )\n", + "\n", + " # Remove `None` values\n", + " params = {k: v for k, v in params.items() if v is not None}\n", + " return params\n", + "\n", + " def forward(self, query_or_queries: Union[str, list[str]], k: Optional[int] = None) -> dspy.Prediction:\n", + " k = k if k is not None else self.k\n", + "\n", + " queries = [query_or_queries] if isinstance(query_or_queries, str) else query_or_queries\n", + " docs: list[str]\n", + " for query in queries:\n", + " headers = {\"X-API-Key\": self.ydc_api_key}\n", + " params = self._generate_params(query)\n", + " response = requests.get(\n", + " f\"https://api.ydc-index.io/{self.endpoint}\",\n", + " params=params,\n", + " headers=headers,\n", + " )\n", + " response.raise_for_status()\n", + " results = response.json()\n", + "\n", + " if self.endpoint == \"search\":\n", + " docs = [snippet for hits in results[\"hits\"][:k] for snippet in hits[\"snippets\"]]\n", + " elif self.endpoint == \"news\":\n", + " docs = [article[\"description\"] for article in results[\"news\"][\"results\"][:k]]\n", + " return [dotdict({\"long_text\": document}) for document in docs]" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'long_text': \"It's not quite summer yet, though it might as well be ...\"},\n", + " {'long_text': 'PRINCETON, NJ - The Princeton wrestling team announced Thursday that the program will be welcoming seven incoming freshman as a part of the Class of 2028.'},\n", + " {'long_text': 'The new true crime series — from the creators of the award-winning podcast \"Father Wants Us Dead\" — investigates the 1989 cold-case killing of a Princeton grande dame.'}]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# from dspy.retrieve.you_rm import YouRM\n", + "\n", + "news_rm = YouRM(endpoint=\"news\")\n", + "res = news_rm(\"Princeton\")\n", + "res" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Retrieve\n", + "\n", + "A module `dspy.Retrieve(k)` will search for the top-k passages that match a given query. \n", + " \n", + "By default, this will use the retriever we configure in `dspy.settings.configure()`." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "dspy.settings.configure(lm=turbo, rm=news_rm)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Top 3 passages for question: What is latest news about Princeton University? \n", + " ------------------------------ \n", + "\n", + "1] Reunions events begin Thursday, May 23, and run through Sunday, May 26. \n", + "\n", + "2] More than a dozen students at Princeton University said they were ending their hunger strike amid continued anti-Israel demonstrations at the university. \n", + "\n", + "3] Over a dozen students at Princeton University have been on hunger strike for the past week as part of a Gaza solidarity encampment on campus protesting Israel’s war on Gaza and calling on the university to disclose and divest from companies with ties to Israel, among other demands. \n", + "\n" + ] + } + ], + "source": [ + "question = \"What is latest news about Princeton University?\"\n", + "\n", + "retrieve = dspy.Retrieve(k=3)\n", + "topK_passages = retrieve(question).passages\n", + "\n", + "print(f\"Top {retrieve.k} passages for question: {question} \\n\", '-' * 30, '\\n')\n", + "\n", + "for idx, passage in enumerate(topK_passages):\n", + " print(f'{idx+1}]', passage, '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ChatVC: RAG with News API\n", + "\n", + "Given a question, we'll search for the latest news through you.com news API and then feed them as context for answer generation." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Signature\n", + "\n", + "Let's start by defining this signature: `context, question --> answer.`" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [], + "source": [ + "class GenerateAnswer(dspy.Signature):\n", + " \"\"\"Answer questions with the news in the context\"\"\"\n", + " context = dspy.InputField(desc=\"may contain relevant news\")\n", + " question = dspy.InputField()\n", + " answer = dspy.OutputField(desc=\"highlights key points in context - often between 200-500 words\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Module\n", + "\n", + "* The `__init__` method will simply declare the sub-modules it needs: `dspy.Retrieve` and `dspy.ChainOfThought`. The latter is defined to implement our GenerateAnswer signature.\n", + "* The `forward` method will describe the control flow of answering the question using the modules we have." + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "class RAG(dspy.Module):\n", + " def __init__(self, num_passages=3):\n", + " super().__init__()\n", + "\n", + " self.retrieve = dspy.Retrieve(k=num_passages)\n", + " self.generate_answer = dspy.ChainOfThought(GenerateAnswer)\n", + " \n", + " def forward(self, question):\n", + " context = self.retrieve(question).passages\n", + " prediction = self.generate_answer(context=context, question=question)\n", + " return dspy.Prediction(context=context, answer=prediction.answer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Try it out!" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Question: Princeton\n", + "Predicted Answer: The Princeton wrestling team announced that they will be welcoming seven incoming freshmen as part of the Class of 2028. Additionally, a new true crime series investigates the 1989 cold-case killing of a Princeton grande dame.\n" + ] + } + ], + "source": [ + "my_question = \"Princeton\"\n", + "\n", + "# Get the prediction. This contains `pred.context` and `pred.answer`.\n", + "uncompiled_rag = RAG() # uncompiled (i.e., zero-shot) program\n", + "pred = uncompiled_rag(my_question)\n", + "\n", + "# Print the contexts and the answer.\n", + "print(f\"Question: {my_question}\")\n", + "print(f\"Predicted Answer: {pred.answer}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "inspect the chain of thought for the LM - to iterate and modify signature." + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "\n", + "Answer questions with the news in the context\n", + "\n", + "---\n", + "\n", + "Follow the following format.\n", + "\n", + "Context: may contain relevant news\n", + "\n", + "Question: ${question}\n", + "\n", + "Reasoning: Let's think step by step in order to ${produce the answer}. We ...\n", + "\n", + "Answer: highlights key points in context - often between 50-100 words\n", + "\n", + "---\n", + "\n", + "Context:\n", + "[1] «It's not quite summer yet, though it might as well be ...»\n", + "[2] «PRINCETON, NJ - The Princeton wrestling team announced Thursday that the program will be welcoming seven incoming freshman as a part of the Class of 2028.»\n", + "[3] «The new true crime series — from the creators of the award-winning podcast \"Father Wants Us Dead\" — investigates the 1989 cold-case killing of a Princeton grande dame.»\n", + "\n", + "Question: Princeton\n", + "\n", + "Reasoning: Let's think step by step in order to\u001b[32m Reasoning: Let's think step by step in order to identify the relevant information about Princeton from the context. We have three pieces of information: the weather, a wrestling team announcement, and a true crime series related to Princeton.\n", + "\n", + "Answer: The Princeton wrestling team announced that they will be welcoming seven incoming freshmen as part of the Class of 2028. Additionally, a new true crime series investigates the 1989 cold-case killing of a Princeton grande dame.\u001b[0m\n", + "\n", + "\n", + "\n" + ] + } + ], + "source": [ + "turbo.inspect_history(n=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ChatVC: ReAct Agent with Tools\n", + "\n", + "* ReAct: an LLM agent designed to tackle complex tasks in an interactive fashion\n", + "* In this example, we add multiple retrievers (news and search API) as tools in ReAct to shape the agent's interaction and response mechanisms" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Search\n", + "News\n" + ] + } + ], + "source": [ + "# # youRM are both of type dspy.Retrieve, which has name=\"Search\" and both these tools is search\n", + "# so to be able to use both tools separately, the name needs to be differentiated\n", + "# set these manually\n", + "\n", + "search_rm = YouRM(endpoint=\"search\")\n", + "print(search_rm.name)\n", + "news_rm = YouRM(endpoint=\"news\")\n", + "news_rm.name = \"News\"\n", + "print(news_rm.name)\n", + "\n", + "gen = dspy.ReAct('question -> answer', tools=[search_rm, news_rm])" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Question: What news do you have on Princeton that might be interesting to a VC firm?\n", + "Final Predicted Answer (after ReAct process): Princeton recently hosted an event where 600 leaders from academia, business, and government gathered to explore the rapidly evolving possibilities and challenges of artificial intelligence. This event could be of interest to a VC firm looking for investment opportunities or collaborations in the AI sector.\n" + ] + } + ], + "source": [ + "# Call the ReAct module on a particular input\n", + "question = 'What news do you have on Princeton that might be interesting to a VC firm?'\n", + "result = gen(question=question)\n", + "\n", + "print(f\"Question: {question}\")\n", + "print(f\"Final Predicted Answer (after ReAct process): {result.answer}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "\n", + "You will be given `question` and you will respond with `answer`.\n", + "\n", + "To do this, you will interleave Thought, Action, and Observation steps.\n", + "\n", + "Thought can reason about the current situation, and Action can be the following types:\n", + "\n", + "(1) Search[query], which takes a search query and returns one or more potentially relevant passages from a corpus\n", + "(2) News[query], which takes a search query and returns one or more potentially relevant passages from a corpus\n", + "(3) Finish[answer], which returns the final `answer` and finishes the task\n", + "\n", + "---\n", + "\n", + "Follow the following format.\n", + "\n", + "Question: ${question}\n", + "Thought 1: next steps to take based on last observation\n", + "Action 1: always either Search[query] or News[query] or, when done, Finish[answer]\n", + "\n", + "---\n", + "\n", + "Question: What news do you have on Princeton that might be interesting to a VC firm?\n", + "Thought 1:\u001b[32m Thought 1: To provide relevant news about Princeton that might be interesting to a VC firm, I should look for recent developments in the areas of technology, startups, investments, and academic research coming out of Princeton University or the Princeton area.\n", + "\n", + "Action 1: News[Princeton University technology startups investments]\u001b[0m\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "You will be given `question` and you will respond with `answer`.\n", + "\n", + "To do this, you will interleave Thought, Action, and Observation steps.\n", + "\n", + "Thought can reason about the current situation, and Action can be the following types:\n", + "\n", + "(1) Search[query], which takes a search query and returns one or more potentially relevant passages from a corpus\n", + "(2) News[query], which takes a search query and returns one or more potentially relevant passages from a corpus\n", + "(3) Finish[answer], which returns the final `answer` and finishes the task\n", + "\n", + "---\n", + "\n", + "Follow the following format.\n", + "\n", + "Question: ${question}\n", + "\n", + "Thought 1: next steps to take based on last observation\n", + "\n", + "Action 1: always either Search[query] or News[query] or, when done, Finish[answer]\n", + "\n", + "Observation 1: observations based on action\n", + "\n", + "Thought 2: next steps to take based on last observation\n", + "\n", + "Action 2: always either Search[query] or News[query] or, when done, Finish[answer]\n", + "\n", + "---\n", + "\n", + "Question: What news do you have on Princeton that might be interesting to a VC firm?\n", + "\n", + "Thought 1: Thought 1: To provide relevant news about Princeton that might be interesting to a VC firm, I should look for recent developments in the areas of technology, startups, investments, and academic research coming out of Princeton University or the Princeton area.\n", + "\n", + "Action 1: News[Princeton University technology startups investments]\n", + "\n", + "Observation 1:\n", + "[1] «{'long_text': 'Read more about tech startup news that breaks down funding, growth, and long-term trajectory of companies across every stage and industry.'}»\n", + "[2] «{'long_text': 'Princeton welcomed 600 leaders from academia, business and government to campus to explore the rapidly evolving possibilities and challenges of artificial intelligence and to begin charting a course for New Jersey’s role in the future of AI.'}»\n", + "[3] «{'long_text': 'The University’s endowment investments declined for a second consecutive year, according to the Princeton University Investment Co.'}»\n", + "\n", + "Thought 2:\u001b[32m Thought 2: The second observation mentions a significant event where Princeton welcomed leaders from various sectors to discuss the future of AI, which could be highly relevant to a VC firm interested in technology and innovation. This event could indicate potential investment opportunities or collaborations in the AI sector.\n", + "\n", + "Action 2: Finish[Princeton recently hosted an event where 600 leaders from academia, business, and government gathered to explore the rapidly evolving possibilities and challenges of artificial intelligence. This event could be of interest to a VC firm looking for investment opportunities or collaborations in the AI sector.]\u001b[0m\n", + "\n", + "\n", + "\n" + ] + } + ], + "source": [ + "turbo.inspect_history(n=3)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/you_news_and_llama_index.ipynb b/you_news_and_llama_index.ipynb index 6316908..21fc161 100644 --- a/you_news_and_llama_index.ipynb +++ b/you_news_and_llama_index.ipynb @@ -100,7 +100,7 @@ "# TODO update the pinned version of llama-index-retrievers-you after PR is merged: https://github.com/run-llama/llama_index/pull/13934\n", "from llama_index.retrievers.you import YouRetriever\n", "\n", - "retriever = YouRetriever(endpoint_type=\"news\")\n", + "retriever = YouRetriever()\n", "retrieved_results = retriever.retrieve(\"national parks in the US\")\n", "\n", "print(retrieved_results[0].get_content())"