Skip to content

Commit af6139f

Browse files
tahierhussainnehabagdiajaseemjaskpjaags-dev
authored
Feature/prompt studio document manager (Zipstack#78)
* Implemented re-index * Fixed data persistence issue in the summarize modal * Reverted to the default background color of the PDF viewer * UI Improvements in Output Analyzer * Fixed issue with spaces in the prompt/notes card * Added loader to the submit button * Revert "FIX: Prompt Studio Bug Fixed (Zipstack#65)" This reverts commit 35429b1. * Revert "Revert "FIX: Prompt Studio Bug Fixed (Zipstack#65)"" This reverts commit 1e2921d. * Revert "FIX: Prompt Studio Bug Fixed (Zipstack#65)" This reverts commit 35429b1. * Backend changes related to document manager * UI changes related to document manager * Index Manager and Document Manager changes * Code efficiency improvement in the document manager BE * UI changes to support document manager changes * implemented new design * Added API support for index manager * FE changes for showing the indexing status in the Manage Documents table * fixed prompt list not updated after adding new prompt * UI bug fixes and improvements * UI bug fixes * Optimized migrations * Modification in migrations * Code quality improvement --------- Co-authored-by: Neha <115609453+nehabagdia@users.noreply.github.com> Co-authored-by: Jaseem Jas <89440144+jaseemjaskp@users.noreply.github.com> Co-authored-by: jagadeeswaran-zipstack <jagadeeswaran@zipstack.com>
1 parent 80d4870 commit af6139f

65 files changed

Lines changed: 1642 additions & 720 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

backend/backend/settings/base.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,8 @@ def get_required_setting(
210210
"prompt_studio.prompt_studio_core",
211211
"prompt_studio.prompt_studio_registry",
212212
"prompt_studio.prompt_studio_output_manager",
213+
"prompt_studio.prompt_studio_document_manager",
214+
"prompt_studio.prompt_studio_index_manager",
213215
)
214216

215217
INSTALLED_APPS = list(SHARED_APPS) + [

backend/backend/urls.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,12 @@
5050
UrlPathConstants.PROMPT_STUDIO,
5151
include("prompt_studio.prompt_studio_output_manager.urls"),
5252
),
53+
path(
54+
UrlPathConstants.PROMPT_STUDIO,
55+
include("prompt_studio.prompt_studio_document_manager.urls"),
56+
),
57+
path(
58+
UrlPathConstants.PROMPT_STUDIO,
59+
include("prompt_studio.prompt_studio_index_manager.urls"),
60+
),
5361
]

backend/file_management/constants.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,9 @@ class FileInformationKey:
66
FILE_UPLOAD_MAX_SIZE = 100 * 1024 * 1024
77
FILE_UPLOAD_ALLOWED_EXT = ["pdf"]
88
FILE_UPLOAD_ALLOWED_MIME = ["application/pdf"]
9+
10+
class FileViewTypes:
11+
ORIGINAL = "ORIGINAL"
12+
EXTRACT = "EXTRACT"
13+
SUMMARIZE = "SUMMARIZE"
14+

backend/file_management/file_management_helper.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,8 @@ def fetch_file_contents(
194194

195195
elif file_content_type == "text/plain":
196196
with fs.open(file_path, "r") as file:
197-
FileManagerHelper.logger.info(f"Reading text file: {file_path}")
197+
FileManagerHelper.logger.info(
198+
f"Reading text file: {file_path}")
198199
text_content = file.read()
199200
return text_content
200201
else:

backend/file_management/serializer.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,9 @@ class FileUploadIdeSerializer(serializers.Serializer):
5050

5151

5252
class FileInfoIdeSerializer(serializers.Serializer):
53-
file_name = serializers.CharField()
53+
document_id = serializers.CharField()
5454
tool_id = serializers.CharField()
55+
view_type = serializers.CharField(required=False)
5556

5657

5758
class FileListRequestIdeSerializer(serializers.Serializer):

backend/file_management/views.py

Lines changed: 47 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from connector.models import ConnectorInstance
66
from django.http import HttpRequest
7+
from file_management.constants import FileViewTypes
78
from file_management.exceptions import (
89
ConnectorInstanceNotFound,
910
ConnectorOAuthError,
@@ -20,10 +21,15 @@
2021
FileUploadSerializer,
2122
)
2223
from oauth2client.client import HttpAccessTokenRefreshError
24+
from prompt_studio.prompt_studio_document_manager.models import DocumentManager
25+
from prompt_studio.prompt_studio_document_manager.prompt_studio_document_helper import (
26+
PromptStudioDocumentHelper,
27+
)
2328
from rest_framework import serializers, status, viewsets
2429
from rest_framework.decorators import action
2530
from rest_framework.response import Response
2631
from rest_framework.versioning import URLPathVersioning
32+
2733
from unstract.connectors.exceptions import ConnectorError
2834
from unstract.connectors.filesystems.local_storage.local_storage import (
2935
LocalStorageFS,
@@ -132,8 +138,21 @@ def upload_for_ide(self, request: HttpRequest) -> Response:
132138
tool_id=tool_id,
133139
)
134140
file_system = LocalStorageFS(settings={"path": file_path})
141+
142+
documents = []
135143
for uploaded_file in uploaded_files:
136144
file_name = uploaded_file.name
145+
146+
# Create a record in the db for the file
147+
document = PromptStudioDocumentHelper.create(
148+
tool_id=tool_id, document_name=file_name)
149+
# Create a dictionary to store document data
150+
doc = {
151+
"document_id": document.document_id,
152+
"document_name": document.document_name,
153+
"tool": document.tool.tool_id
154+
}
155+
# Store file
137156
logger.info(
138157
f"Uploading file: {file_name}"
139158
if file_name
@@ -145,14 +164,31 @@ def upload_for_ide(self, request: HttpRequest) -> Response:
145164
uploaded_file,
146165
file_name,
147166
)
148-
return Response({"message": "Files are uploaded successfully!"})
167+
documents.append(doc)
168+
return Response({"data": documents})
149169

150170
@action(detail=True, methods=["get"])
151171
def fetch_contents_ide(self, request: HttpRequest) -> Response:
152172
serializer = FileInfoIdeSerializer(data=request.GET)
153173
serializer.is_valid(raise_exception=True)
154-
file_name: str = serializer.validated_data.get("file_name")
174+
document_id: str = serializer.validated_data.get("document_id")
175+
document: DocumentManager = DocumentManager.objects.get(pk=document_id)
176+
file_name: str = document.document_name
155177
tool_id: str = serializer.validated_data.get("tool_id")
178+
view_type: str = serializer.validated_data.get("view_type")
179+
180+
filename_without_extension = file_name.rsplit('.', 1)[0]
181+
if view_type == FileViewTypes.EXTRACT:
182+
file_name = (
183+
f"{FileViewTypes.EXTRACT.lower()}/"
184+
f"{filename_without_extension}.txt"
185+
)
186+
if view_type == FileViewTypes.SUMMARIZE:
187+
file_name = (
188+
f"{FileViewTypes.SUMMARIZE.lower()}/"
189+
f"{filename_without_extension}.txt"
190+
)
191+
156192
file_path = (
157193
file_path
158194
) = FileManagerHelper.handle_sub_directory_for_tenants(
@@ -165,7 +201,8 @@ def fetch_contents_ide(self, request: HttpRequest) -> Response:
165201
if not file_path.endswith("/"):
166202
file_path += "/"
167203
file_path += file_name
168-
contents = FileManagerHelper.fetch_file_contents(file_system, file_path)
204+
contents = FileManagerHelper.fetch_file_contents(
205+
file_system, file_path)
169206
return Response({"data": contents}, status=status.HTTP_200_OK)
170207

171208
@action(detail=True, methods=["get"])
@@ -196,7 +233,9 @@ def list_ide(self, request: HttpRequest) -> Response:
196233
def delete(self, request: HttpRequest) -> Response:
197234
serializer = FileInfoIdeSerializer(data=request.GET)
198235
serializer.is_valid(raise_exception=True)
199-
file_name: str = serializer.validated_data.get("file_name")
236+
document_id: str = serializer.validated_data.get("document_id")
237+
document: DocumentManager = DocumentManager.objects.get(pk=document_id)
238+
file_name: str = document.document_name
200239
tool_id: str = serializer.validated_data.get("tool_id")
201240
file_path = FileManagerHelper.handle_sub_directory_for_tenants(
202241
request.org_id,
@@ -205,13 +244,12 @@ def delete(self, request: HttpRequest) -> Response:
205244
tool_id=tool_id,
206245
)
207246
path = file_path
208-
if not file_name:
209-
return Response(
210-
{"data": "File deletion failed. File name is mandatory"},
211-
status=status.HTTP_400_BAD_REQUEST,
212-
)
213247
file_system = LocalStorageFS(settings={"path": path})
214248
try:
249+
# Delete the document record
250+
document.delete()
251+
252+
# Delete the file
215253
FileManagerHelper.delete_file(file_system, path, file_name)
216254
return Response(
217255
{"data": "File deleted succesfully."},

backend/prompt_studio/prompt_profile_manager/migrations/0008_profilemanager_migration.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,14 @@ class Migration(migrations.Migration):
1111
"prompt_profile_manager",
1212
"0007_profilemanager_is_default_and_more",
1313
),
14+
(
15+
"prompt_studio",
16+
"0006_alter_toolstudioprompt_prompt_key_and_more",
17+
),
18+
(
19+
"prompt_studio_core",
20+
"0007_remove_customtool_default_profile_and_more",
21+
)
1422
]
1523

1624
def MigrateProfileManager(apps: Any, schema_editor: Any) -> None:

backend/prompt_studio/prompt_profile_manager/views.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def get_queryset(self) -> Optional[QuerySet]:
4545
def create(
4646
self, request: HttpRequest, *args: tuple[Any], **kwargs: dict[str, Any]
4747
) -> Response:
48-
serializer = self.get_serializer(data=request.data)
48+
serializer: ProfileManagerSerializer = self.get_serializer(data=request.data)
4949
# Overriding default exception behaviour
5050
# TO DO : Handle model related exceptions.
5151
serializer.is_valid(raise_exception=True)

backend/prompt_studio/prompt_studio_core/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ class ToolStudioPromptKeys:
7777
EVAL_SETTINGS_EXCLUDE_FAILED = "exclude_failed"
7878
SUMMARIZE = "summarize"
7979
SUMMARIZED_RESULT = "summarized_result"
80+
DOCUMENT_ID = "document_id"
8081

8182

8283
class LogLevels:

backend/prompt_studio/prompt_studio_core/prompt_studio_helper.py

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
from prompt_studio.prompt_studio_core.prompt_ide_base_tool import (
2424
PromptIdeBaseTool,
2525
)
26+
from prompt_studio.prompt_studio_index_manager.prompt_studio_index_helper import (
27+
PromptStudioIndexHelper,
28+
)
2629
from unstract.sdk.constants import LogLevel
2730
from unstract.sdk.index import ToolIndex
2831
from unstract.sdk.prompt import PromptTool
@@ -85,6 +88,7 @@ def index_document(
8588
file_name: str,
8689
org_id: str,
8790
user_id: str,
91+
document_id: str,
8892
is_summary: bool = False,
8993
) -> Any:
9094
"""Method to index a document.
@@ -149,6 +153,7 @@ def index_document(
149153
tool_id=tool_id,
150154
file_name=file_path,
151155
org_id=org_id,
156+
document_id=document_id,
152157
is_summary=is_summary,
153158
)
154159
logger.info(f"Indexing done sucessfully for {file_name}")
@@ -164,7 +169,12 @@ def index_document(
164169

165170
@staticmethod
166171
def prompt_responder(
167-
id: str, tool_id: str, file_name: str, org_id: str, user_id: str
172+
id: str,
173+
tool_id: str,
174+
file_name: str,
175+
org_id: str,
176+
user_id: str,
177+
document_id: str
168178
) -> Any:
169179
"""Execute chain/single run of the prompts. Makes a call to prompt
170180
service and returns the dict of response.
@@ -217,7 +227,8 @@ def prompt_responder(
217227
),
218228
)
219229
if not prompt_instance:
220-
logger.error(f"Prompt id {id} does not have any data in db")
230+
logger.error(
231+
f"Prompt id {id} does not have any data in db")
221232
raise PromptNotValid()
222233
except Exception as exc:
223234
logger.error(f"Error while fetching prompt {exc}")
@@ -242,7 +253,11 @@ def prompt_responder(
242253
)
243254
logger.info(f"Invoking prompt service for prompt id {id}")
244255
response = PromptStudioHelper._fetch_response(
245-
path=file_path, tool=tool, prompts=prompts, org_id=org_id
256+
path=file_path,
257+
tool=tool,
258+
prompts=prompts,
259+
org_id=org_id,
260+
document_id=document_id
246261
)
247262
stream_log.publish(
248263
tool.tool_id,
@@ -262,6 +277,7 @@ def _fetch_response(
262277
path: str,
263278
prompts: list[ToolStudioPrompt],
264279
org_id: str,
280+
document_id: str
265281
) -> Any:
266282
"""Utility function to invoke prompt service. Used internally.
267283
@@ -302,6 +318,7 @@ def _fetch_response(
302318
file_name=path,
303319
tool_id=str(tool.tool_id),
304320
org_id=org_id,
321+
document_id=document_id,
305322
is_summary=tool.summarize_as_source,
306323
)
307324

@@ -382,6 +399,7 @@ def dynamic_indexer(
382399
tool_id: str,
383400
file_name: str,
384401
org_id: str,
402+
document_id: str,
385403
is_summary: bool = False,
386404
) -> str:
387405
try:
@@ -400,7 +418,7 @@ def dynamic_indexer(
400418
extract_file_path = os.path.join(
401419
directory, "extract", os.path.splitext(filename)[0] + ".txt"
402420
)
403-
return str(
421+
doc_id = str(
404422
tool_index.index_file(
405423
tool_id=tool_id,
406424
embedding_type=embedding_model,
@@ -414,3 +432,12 @@ def dynamic_indexer(
414432
output_file_path=extract_file_path,
415433
)
416434
)
435+
436+
PromptStudioIndexHelper.handle_index_manager(
437+
document_id=document_id,
438+
is_summary=is_summary,
439+
profile_manager=profile_manager,
440+
doc_id=doc_id,
441+
)
442+
443+
return doc_id

0 commit comments

Comments
 (0)