From 9622f542cb55589e7fa04df722d1438328e83ee6 Mon Sep 17 00:00:00 2001 From: Pranjal Doshi Date: Wed, 30 Oct 2024 12:14:33 +0530 Subject: [PATCH 1/5] Port milvus changes to release branch --- RAG/examples/local_deploy/docker-compose-vectordb.yaml | 9 ++++++++- RAG/src/chain_server/utils.py | 8 ++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/RAG/examples/local_deploy/docker-compose-vectordb.yaml b/RAG/examples/local_deploy/docker-compose-vectordb.yaml index cd76bc981..4f7a9ecf7 100644 --- a/RAG/examples/local_deploy/docker-compose-vectordb.yaml +++ b/RAG/examples/local_deploy/docker-compose-vectordb.yaml @@ -54,7 +54,7 @@ services: milvus: container_name: milvus-standalone - image: milvusdb/milvus:v2.4.5 + image: milvusdb/milvus:v2.4.15-gpu command: ["milvus", "run", "standalone"] environment: ETCD_ENDPOINTS: etcd:2379 @@ -74,6 +74,13 @@ services: depends_on: - "etcd" - "minio" + deploy: + resources: + reservations: + devices: + - driver: nvidia + capabilities: ["gpu"] + device_ids: ['${VECTORSTORE_GPU_DEVICE_ID:-0}'] profiles: ["nemo-retriever", "milvus", ""] elasticsearch: diff --git a/RAG/src/chain_server/utils.py b/RAG/src/chain_server/utils.py index caa8169bb..8986f6075 100644 --- a/RAG/src/chain_server/utils.py +++ b/RAG/src/chain_server/utils.py @@ -314,13 +314,17 @@ def create_vectorstore_langchain(document_embedder: "Embeddings", collection_nam ) elif config.vector_store.name == "milvus": logger.info(f"Using milvus collection: {collection_name}") - # vectorstore url can be updated using environment variable APP_VECTORSTORE_URL, it should be in http://ip:port format + if not collection_name: + collection_name = os.getenv('COLLECTION_NAME', "vector_db") + logger.info(f"Using milvus collection: {collection_name}") url = urlparse(config.vector_store.url) vectorstore = Milvus( document_embedder, connection_args={"host": url.hostname, "port": url.port}, collection_name=collection_name, - auto_id=True, + index_params={"index_type": config.vector_store.index_type, "metric_type": "L2", "nlist": config.vector_store.nlist}, + search_params={"nprobe": config.vector_store.nprobe}, + auto_id = True ) else: raise ValueError(f"{config.vector_store.name} vector database is not supported") From 1465fdcb8226f42fcdd253e873b37b70013dade4 Mon Sep 17 00:00:00 2001 From: Pranjal Doshi Date: Tue, 5 Nov 2024 09:20:13 +0530 Subject: [PATCH 2/5] Use GPU flag for document ingestion as default --- RAG/src/chain_server/configuration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RAG/src/chain_server/configuration.py b/RAG/src/chain_server/configuration.py index 2fead6c5d..ba0dde217 100644 --- a/RAG/src/chain_server/configuration.py +++ b/RAG/src/chain_server/configuration.py @@ -40,7 +40,7 @@ class VectorStoreConfig(ConfigWizard): "nprobe", default=16, help_txt="Number of units to query", # IVF Flat milvus ) index_type: str = configfield( - "index_type", default="IVF_FLAT", help_txt="Index of the vector db", # IVF Flat for milvus + "index_type", default="GPU_IVF_FLAT", help_txt="Index of the vector db", # IVF Flat for milvus ) From 091cfc7f627d629e7ccd765d1be785c70c8cac8d Mon Sep 17 00:00:00 2001 From: Pranjal Doshi Date: Wed, 30 Oct 2024 10:05:56 +0530 Subject: [PATCH 3/5] Remove text field from milvus query for list/delete API --- RAG/src/chain_server/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/RAG/src/chain_server/utils.py b/RAG/src/chain_server/utils.py index 8986f6075..0a7ee247d 100644 --- a/RAG/src/chain_server/utils.py +++ b/RAG/src/chain_server/utils.py @@ -521,7 +521,7 @@ def get_docs_vectorstore_langchain(vectorstore: VectorStore) -> List[str]: elif settings.vector_store.name == "milvus": # Getting all the ID's > 0 if vectorstore.col: - milvus_data = vectorstore.col.query(expr="pk >= 0", output_fields=["pk", "source", "text"]) + milvus_data = vectorstore.col.query(expr="pk >= 0", output_fields=["pk", "source"]) filenames = set([extract_filename(metadata) for metadata in milvus_data]) return filenames except Exception as e: @@ -586,7 +586,7 @@ def del_docs_vectorstore_langchain(vectorstore: VectorStore, filenames: List[str logger.info(f"Deleted documents with filenames {filename}") elif settings.vector_store.name == "milvus": # Getting all the ID's > 0 - milvus_data = vectorstore.col.query(expr="pk >= 0", output_fields=["pk", "source", "text"]) + milvus_data = vectorstore.col.query(expr="pk >= 0", output_fields=["pk", "source"]) for filename in filenames: # get ids with filename in metadata ids_list = [metadata["pk"] for metadata in milvus_data if extract_filename(metadata) == filename] From 8661066e802158783a9080b1759eb52ef2b76a0b Mon Sep 17 00:00:00 2001 From: Shubhadeep Das Date: Fri, 8 Nov 2024 19:31:11 +0530 Subject: [PATCH 4/5] Revert "Remove text field from milvus query for list/delete API" This reverts commit 091cfc7f627d629e7ccd765d1be785c70c8cac8d. --- RAG/src/chain_server/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/RAG/src/chain_server/utils.py b/RAG/src/chain_server/utils.py index 0a7ee247d..8986f6075 100644 --- a/RAG/src/chain_server/utils.py +++ b/RAG/src/chain_server/utils.py @@ -521,7 +521,7 @@ def get_docs_vectorstore_langchain(vectorstore: VectorStore) -> List[str]: elif settings.vector_store.name == "milvus": # Getting all the ID's > 0 if vectorstore.col: - milvus_data = vectorstore.col.query(expr="pk >= 0", output_fields=["pk", "source"]) + milvus_data = vectorstore.col.query(expr="pk >= 0", output_fields=["pk", "source", "text"]) filenames = set([extract_filename(metadata) for metadata in milvus_data]) return filenames except Exception as e: @@ -586,7 +586,7 @@ def del_docs_vectorstore_langchain(vectorstore: VectorStore, filenames: List[str logger.info(f"Deleted documents with filenames {filename}") elif settings.vector_store.name == "milvus": # Getting all the ID's > 0 - milvus_data = vectorstore.col.query(expr="pk >= 0", output_fields=["pk", "source"]) + milvus_data = vectorstore.col.query(expr="pk >= 0", output_fields=["pk", "source", "text"]) for filename in filenames: # get ids with filename in metadata ids_list = [metadata["pk"] for metadata in milvus_data if extract_filename(metadata) == filename] From a8b82fa7677064b45d770609415310f3254b98a9 Mon Sep 17 00:00:00 2001 From: Shubhadeep Das Date: Wed, 13 Nov 2024 16:44:47 +0530 Subject: [PATCH 5/5] Disable conv history store and support to consider conv history from server --- .../advanced_rag/multi_turn_rag/chains.py | 91 +++++++++++-------- .../advanced_rag/multi_turn_rag/prompt.yaml | 2 - RAG/src/rag_playground/requirements.txt | 2 +- 3 files changed, 55 insertions(+), 40 deletions(-) diff --git a/RAG/examples/advanced_rag/multi_turn_rag/chains.py b/RAG/examples/advanced_rag/multi_turn_rag/chains.py index dd4664969..87ea0f918 100644 --- a/RAG/examples/advanced_rag/multi_turn_rag/chains.py +++ b/RAG/examples/advanced_rag/multi_turn_rag/chains.py @@ -127,15 +127,31 @@ def rag_chain(self, query: str, chat_history: List["Message"], **kwargs) -> Gene # ] # ) - # This is a workaround Prompt Template + logger.info(f"Chat history: {chat_history}") + + conversation_history = [(msg.role, msg.content) for msg in chat_history] + system_message = [("system", prompts.get("multi_turn_rag_template", ""))] + user_message = [("user", "{input}")] + + # Checking if conversation_history is not None and not empty chat_prompt = ChatPromptTemplate.from_messages( - [("user", prompts.get("multi_turn_rag_template") + "User Query: {input}"),] + system_message + conversation_history + user_message + ) if conversation_history else ChatPromptTemplate.from_messages( + system_message + user_message ) + logger.info(f"Formulated chat prompt template: {chat_prompt}") + + #chat_prompt = ChatPromptTemplate.from_messages(system_message + user_message) + # This is a workaround Prompt Template + # chat_prompt = ChatPromptTemplate.from_messages( + # [("user", prompts.get("multi_turn_rag_template") + "User Query: {input}"),] + # ) + llm = get_llm(**kwargs) stream_chain = chat_prompt | llm | StrOutputParser() - convstore = create_vectorstore_langchain(document_embedder, collection_name="conv_store") + # convstore = create_vectorstore_langchain(document_embedder, collection_name="conv_store") resp_str = "" # TODO Integrate chat_history @@ -161,15 +177,16 @@ def rag_chain(self, query: str, chat_history: List["Message"], **kwargs) -> Gene } ) - history_chain = RunnableAssign( - { - "history": itemgetter("input") - | convstore.as_retriever( - search_type="similarity_score_threshold", - search_kwargs={"score_threshold": settings.retriever.score_threshold, "k": top_k}, - ) - } - ) + # history_chain = RunnableAssign( + # { + # "history": itemgetter("input") + # | convstore.as_retriever( + # search_type="similarity_score_threshold", + # search_kwargs={"score_threshold": settings.retriever.score_threshold, "k": top_k}, + # ) + # } + # ) + if ranker: logger.info( f"Narrowing the collection from {top_k} results and further narrowing it to {settings.retriever.top_k} with the reranker." @@ -181,17 +198,17 @@ def rag_chain(self, query: str, chat_history: List["Message"], **kwargs) -> Gene ) } ) - history_reranker = RunnableAssign( - { - "history": lambda input: ranker.compress_documents( - query=input['input'], documents=input['history'] - ) - } - ) - - retrieval_chain = context_chain | context_reranker | history_chain | history_reranker + # history_reranker = RunnableAssign( + # { + # "history": lambda input: ranker.compress_documents( + # query=input['input'], documents=input['history'] + # ) + # } + # ) + + retrieval_chain = context_chain | context_reranker #| history_chain | history_reranker else: - retrieval_chain = context_chain | history_chain + retrieval_chain = context_chain #| history_chain # Handling Retrieval failure docs = retrieval_chain.invoke({"input": query}, config={"callbacks": [self.cb_handler]}) if not docs: @@ -210,7 +227,7 @@ def rag_chain(self, query: str, chat_history: List["Message"], **kwargs) -> Gene yield chunk resp_str += chunk - self.save_memory_and_get_output({"input": query, "output": resp_str}, convstore) + #self.save_memory_and_get_output({"input": query, "output": resp_str}, convstore) return chain.stream(query, config={"callbacks": [self.cb_handler]}) @@ -223,9 +240,9 @@ def rag_chain(self, query: str, chat_history: List["Message"], **kwargs) -> Gene {"context": itemgetter("input") | ds.as_retriever(search_kwargs={"k": top_k})} ) - history_chain = RunnableAssign( - {"history": itemgetter("input") | convstore.as_retriever(search_kwargs={"k": top_k})} - ) + # history_chain = RunnableAssign( + # {"history": itemgetter("input") | convstore.as_retriever(search_kwargs={"k": top_k})} + # ) if ranker: logger.info( f"Narrowing the collection from {top_k} results and further narrowing it to {settings.retriever.top_k} with the reranker." @@ -237,17 +254,17 @@ def rag_chain(self, query: str, chat_history: List["Message"], **kwargs) -> Gene ) } ) - history_reranker = RunnableAssign( - { - "history": lambda input: ranker.compress_documents( - query=input['input'], documents=input['history'] - ) - } - ) - - retrieval_chain = context_chain | context_reranker | history_chain | history_reranker + # history_reranker = RunnableAssign( + # { + # "history": lambda input: ranker.compress_documents( + # query=input['input'], documents=input['history'] + # ) + # } + # ) + + retrieval_chain = context_chain | context_reranker #| history_chain | history_reranker else: - retrieval_chain = context_chain | history_chain + retrieval_chain = context_chain #| history_chain # Handling Retrieval failure docs = retrieval_chain.invoke({"input": query}, config={"callbacks": [self.cb_handler]}) @@ -265,7 +282,7 @@ def rag_chain(self, query: str, chat_history: List["Message"], **kwargs) -> Gene yield chunk resp_str += chunk - self.save_memory_and_get_output({"input": query, "output": resp_str}, convstore) + #self.save_memory_and_get_output({"input": query, "output": resp_str}, convstore) return chain.stream(query, config={"callbacks": [self.cb_handler]}) diff --git a/RAG/examples/advanced_rag/multi_turn_rag/prompt.yaml b/RAG/examples/advanced_rag/multi_turn_rag/prompt.yaml index 5e63a6443..ee8147637 100644 --- a/RAG/examples/advanced_rag/multi_turn_rag/prompt.yaml +++ b/RAG/examples/advanced_rag/multi_turn_rag/prompt.yaml @@ -15,8 +15,6 @@ multi_turn_rag_template: | You are a document chatbot. Help the user as they ask questions about documents. User message just asked: {input}\n\n For this, we have retrieved the following potentially-useful info: - Conversation History Retrieved: - {history}\n\n Document Retrieved: {context}\n\n Answer only from retrieved data. Make your response conversational. diff --git a/RAG/src/rag_playground/requirements.txt b/RAG/src/rag_playground/requirements.txt index c94be2174..e8d22682e 100644 --- a/RAG/src/rag_playground/requirements.txt +++ b/RAG/src/rag_playground/requirements.txt @@ -1,6 +1,6 @@ PyYAML==6.0.1 dataclass-wizard==0.22.3 -gradio==4.13.0 +gradio==4.43.0 jinja2==3.1.3 numpy==1.26.4 opentelemetry-api==1.23.0