Skip to content

Commit 0770865

Browse files
author
hzy@singhand.com
committed
Merge branch 'main' into feature/support-gunicorn-test3
2 parents 7d953b6 + ec60b32 commit 0770865

File tree

125 files changed

+9740
-627
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

125 files changed

+9740
-627
lines changed

agent/component/categorize.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,13 @@ def _run(self, history, **kwargs):
9999
# If a category is found, return the category with the highest count.
100100
if any(category_counts.values()):
101101
max_category = max(category_counts.items(), key=lambda x: x[1])
102-
return Categorize.be_output(self._param.category_description[max_category[0]]["to"])
102+
res = Categorize.be_output(self._param.category_description[max_category[0]]["to"])
103+
self.set_output(res)
104+
return res
103105

104-
return Categorize.be_output(list(self._param.category_description.items())[-1][1]["to"])
106+
res = Categorize.be_output(list(self._param.category_description.items())[-1][1]["to"])
107+
self.set_output(res)
108+
return res
105109

106110
def debug(self, **kwargs):
107111
df = self._run([], **kwargs)

api/apps/conversation_app.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def set_conversation():
4242
conv_id = req.get("conversation_id")
4343
is_new = req.get("is_new")
4444
name = req.get("name", "New conversation")
45+
req["user_id"] = current_user.id
4546

4647
if len(name) > 255:
4748
name = name[0:255]
@@ -64,7 +65,7 @@ def set_conversation():
6465
e, dia = DialogService.get_by_id(req["dialog_id"])
6566
if not e:
6667
return get_data_error_result(message="Dialog not found")
67-
conv = {"id": conv_id, "dialog_id": req["dialog_id"], "name": name, "message": [{"role": "assistant", "content": dia.prompt_config["prologue"]}]}
68+
conv = {"id": conv_id, "dialog_id": req["dialog_id"], "name": name, "message": [{"role": "assistant", "content": dia.prompt_config["prologue"]}],"user_id": current_user.id}
6869
ConversationService.save(**conv)
6970
return get_json_result(data=conv)
7071
except Exception as e:
@@ -248,7 +249,7 @@ def stream():
248249
else:
249250
answer = None
250251
for ans in chat(dia, msg, **req):
251-
answer = structure_answer(conv, ans, message_id, req["conversation_id"])
252+
answer = structure_answer(conv, ans, message_id, conv.id)
252253
ConversationService.update_by_id(conv.id, conv.to_dict())
253254
break
254255
return get_json_result(data=answer)

api/apps/dialog_app.py

Lines changed: 4 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929

3030
@manager.route('/set', methods=['POST']) # noqa: F821
31+
@validate_request("prompt_config")
3132
@login_required
3233
def set_dialog():
3334
req = request.json
@@ -43,33 +44,10 @@ def set_dialog():
4344
similarity_threshold = req.get("similarity_threshold", 0.1)
4445
vector_similarity_weight = req.get("vector_similarity_weight", 0.3)
4546
llm_setting = req.get("llm_setting", {})
46-
default_prompt_with_dataset = {
47-
"system": """你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。
48-
以下是知识库:
49-
{knowledge}
50-
以上是知识库。""",
51-
"prologue": "您好,我是您的助手小樱,长得可爱又善良,can I help you?",
52-
"parameters": [
53-
{"key": "knowledge", "optional": False}
54-
],
55-
"empty_response": "Sorry! 知识库中未找到相关内容!"
56-
}
57-
default_prompt_no_dataset = {
58-
"system": """You are a helpful assistant.""",
59-
"prologue": "您好,我是您的助手小樱,长得可爱又善良,can I help you?",
60-
"parameters": [
61-
62-
],
63-
"empty_response": ""
64-
}
65-
prompt_config = req.get("prompt_config", default_prompt_with_dataset)
66-
67-
if not prompt_config["system"]:
68-
prompt_config["system"] = default_prompt_with_dataset["system"]
47+
prompt_config = req["prompt_config"]
6948

70-
if not req.get("kb_ids", []):
71-
if prompt_config['system'] == default_prompt_with_dataset['system'] or "{knowledge}" in prompt_config['system']:
72-
prompt_config = default_prompt_no_dataset
49+
if not req.get("kb_ids", []) and not prompt_config.get("tavily_api_key") and "{knowledge}" in prompt_config['system']:
50+
return get_data_error_result(message="Please remove `{knowledge}` in system prompt since no knowledge base/Tavily used here.")
7351

7452
for p in prompt_config["parameters"]:
7553
if p["optional"]:

api/apps/kb_app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def create():
4747
return get_data_error_result(message="Dataset name must be string.")
4848
if dataset_name == "":
4949
return get_data_error_result(message="Dataset name can't be empty.")
50-
if len(dataset_name) >= DATASET_NAME_LIMIT:
50+
if len(dataset_name.encode("utf-8")) >= DATASET_NAME_LIMIT:
5151
return get_data_error_result(
5252
message=f"Dataset name length is {len(dataset_name)} which is large than {DATASET_NAME_LIMIT}")
5353

api/apps/sdk/chat.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,8 +173,10 @@ def update(tenant_id, chat_id):
173173
if llm:
174174
if "model_name" in llm:
175175
req["llm_id"] = llm.pop("model_name")
176-
if not TenantLLMService.query(tenant_id=tenant_id, llm_name=req["llm_id"], model_type="chat"):
177-
return get_error_data_result(f"`model_name` {req.get('llm_id')} doesn't exist")
176+
if req.get("llm_id") is not None:
177+
llm_name, llm_factory = TenantLLMService.split_model_name_and_factory(req["llm_id"])
178+
if not TenantLLMService.query(tenant_id=tenant_id, llm_name=llm_name, llm_factory=llm_factory, model_type="chat"):
179+
return get_error_data_result(f"`model_name` {req.get('llm_id')} doesn't exist")
178180
req["llm_setting"] = req.pop("llm")
179181
e, tenant = TenantService.get_by_id(tenant_id)
180182
if not e:

api/apps/sdk/dify_retrieval.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from flask import request, jsonify
1717

1818
from api.db import LLMType
19+
from api.db.services.document_service import DocumentService
1920
from api.db.services.knowledgebase_service import KnowledgebaseService
2021
from api.db.services.llm_service import LLMBundle
2122
from api import settings
@@ -70,12 +71,13 @@ def retrieval(tenant_id):
7071

7172
records = []
7273
for c in ranks["chunks"]:
74+
e, doc = DocumentService.get_by_id( c["doc_id"])
7375
c.pop("vector", None)
7476
records.append({
7577
"content": c["content_with_weight"],
7678
"score": c["similarity"],
7779
"title": c["docnm_kwd"],
78-
"metadata": {}
80+
"metadata": doc.meta_fields
7981
})
8082

8183
return jsonify({"records": records})

api/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,4 @@
2525
REQUEST_MAX_WAIT_SEC = 300
2626

2727
DATASET_NAME_LIMIT = 128
28+
FILE_NAME_LEN_LIMIT = 256

api/db/services/canvas_service.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,19 @@ def completion(tenant_id, agent_id, question, session_id=None, stream=True, **kw
173173
conv.reference = []
174174
conv.reference.append({"chunks": [], "doc_aggs": []})
175175

176+
kwargs_changed = False
177+
if kwargs:
178+
query = canvas.get_preset_param()
179+
if query:
180+
for ele in query:
181+
if ele["key"] in kwargs:
182+
if ele["value"] != kwargs[ele["key"]]:
183+
ele["value"] = kwargs[ele["key"]]
184+
kwargs_changed = True
185+
if kwargs_changed:
186+
conv.dsl = json.loads(str(canvas))
187+
API4ConversationService.update_by_id(session_id, {"dsl": conv.dsl})
188+
176189
final_ans = {"reference": [], "content": ""}
177190
if stream:
178191
try:

api/db/services/common_service.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ def delete_by_id(cls, pid):
254254
# Returns:
255255
# Number of records deleted
256256
return cls.model.delete().where(cls.model.id == pid).execute()
257-
257+
258258
@classmethod
259259
@DB.connection_context()
260260
def delete_by_ids(cls, pids):

api/db/services/document_service.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from peewee import fn
2828

2929
from api import settings
30+
from api.constants import IMG_BASE64_PREFIX
3031
from api.db import FileType, LLMType, ParserType, StatusEnum, TaskStatus, UserTenantRole
3132
from api.db.db_models import DB, Document, Knowledgebase, Task, Tenant, UserTenant
3233
from api.db.db_utils import bulk_insert_into_db
@@ -147,7 +148,26 @@ def insert(cls, doc):
147148
def remove_document(cls, doc, tenant_id):
148149
cls.clear_chunk_num(doc.id)
149150
try:
151+
page = 0
152+
page_size = 1000
153+
all_chunk_ids = []
154+
while True:
155+
chunks = settings.docStoreConn.search(["img_id"], [], {"doc_id": doc.id}, [], OrderByExpr(),
156+
page * page_size, page_size, search.index_name(tenant_id),
157+
[doc.kb_id])
158+
chunk_ids = settings.docStoreConn.getChunkIds(chunks)
159+
if not chunk_ids:
160+
break
161+
all_chunk_ids.extend(chunk_ids)
162+
page += 1
163+
for cid in all_chunk_ids:
164+
if STORAGE_IMPL.obj_exist(doc.kb_id, cid):
165+
STORAGE_IMPL.rm(doc.kb_id, cid)
166+
if doc.thumbnail and not doc.thumbnail.startswith(IMG_BASE64_PREFIX):
167+
if STORAGE_IMPL.obj_exist(doc.kb_id, doc.thumbnail):
168+
STORAGE_IMPL.rm(doc.kb_id, doc.thumbnail)
150169
settings.docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), doc.kb_id)
170+
151171
graph_source = settings.docStoreConn.getFields(
152172
settings.docStoreConn.search(["source_id"], [], {"kb_id": doc.kb_id, "knowledge_graph_kwd": ["graph"]}, [], OrderByExpr(), 0, 1, search.index_name(tenant_id), [doc.kb_id]), ["source_id"]
153173
)

0 commit comments

Comments
 (0)