forked from SimranAnand1/LLMtutorial
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
108 lines (93 loc) · 3.97 KB
/
Copy pathmain.py
File metadata and controls
108 lines (93 loc) · 3.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import arxiv,os,glob
import streamlit as st
from langchain_community.document_loaders import DirectoryLoader,PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_openai import ChatOpenAI,OpenAIEmbeddings
from langchain.schema.runnable import RunnableParallel,RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain.prompts import ChatPromptTemplate
path="C:\\Users\\welcome\\OneDrive\\Documents\\GitHub\\LLMtutorial\\tutorial41\\output\\"
if "last_selected_option" not in st.session_state:
st.session_state["last_selected_option"]=None
if "docs_processed" not in st.session_state:
st.session_state["docs_processed"]=False
if "retriever" not in st.session_state:
st.session_state["retriever"]=None
llm=ChatOpenAI(model="gpt-4",temperature=0.0,max_tokens=1024)
template="""
Answer the question based only on the following context:
{context}
Question:{question}
"""
prompt=ChatPromptTemplate.from_template(template)
def download_parse(selected_option):
client=arxiv.Client()
search=arxiv.Search(
query=selected_option,
max_results=15,
sort_by=arxiv.SortCriterion.SubmittedDate
)
results=client.results(search)
for result in results:
try:
result.download_pdf(dirpath=path)
except Exception as e:
print(result)
def setoutput(input_text,retriever):
chain=(RunnableParallel({"context":retriever,"question":RunnablePassthrough()})
| prompt
| llm
| StrOutputParser()
)
result=chain.invoke(input_text)
return result
st.title("Multi Specialty Research Assistant")
col1,col2=st.columns(2)
with col1:
st.header("SELECT YOUR DOMAIN FOR RESEARCH")
options=["healthcare","mathematics","physics","chemistry","AI","computer science","space research","quantum computing"]
selected_option=st.selectbox("choose your domain for research",options,index=0,key="select_option")
if selected_option:
if selected_option != st.session_state["last_selected_option"]:
st.session_state["docs_processed"]=False
st.session_state["last_selected_option"]=selected_option
if selected_option and not st.session_state["docs_processed"]:
files=glob.glob(path+"*.*")
for file in files:
os.remove(file)
download_parse(selected_option)
loader=DirectoryLoader(path=path,glob="./*.pdf",loader_cls=PyPDFLoader)
docs=[]
try:
docs=loader.load()
except Exception as e:
print(f"error load docs{e}")
full_text=""
for doc in docs:
full_text +=doc.page_content
lines=full_text.splitlines()
non_empty_lines=[]
for line in lines:
if line:
non_empty_lines.append(line)
full_text="".join(non_empty_lines)
text_splitter=RecursiveCharacterTextSplitter(chunk_size=600,chunk_overlap=50)
doc_chunks=text_splitter.create_documents([full_text])
vs=FAISS.from_documents(documents=doc_chunks,embedding=OpenAIEmbeddings())
retriever=vs.as_retriever()
vs.save_local("research_index")
st.session_state["docs_processed"]=True
st.session_state["retriever"]=retriever
st.success("Documents are processed and stored into vector db")
input_text=st.text_area("User Question Section",f"ask question related to topic {selected_option}",key="input_text")
if st.button("Submit",type="primary"):
if st.session_state["retriever"] is not None:
result=setoutput(input_text,st.session_state["retriever"])
with col2:
st.header("OUTPUT SECTION")
st.write(result)
else:
with col2:
st.header("OUTPUT SECTION")
st.write("your output will be generated by AI once you hit the submit button")