|
27 | 27 | from peewee import fn |
28 | 28 |
|
29 | 29 | from api import settings |
| 30 | +from api.constants import IMG_BASE64_PREFIX |
30 | 31 | from api.db import FileType, LLMType, ParserType, StatusEnum, TaskStatus, UserTenantRole |
31 | 32 | from api.db.db_models import DB, Document, Knowledgebase, Task, Tenant, UserTenant |
32 | 33 | from api.db.db_utils import bulk_insert_into_db |
@@ -147,7 +148,26 @@ def insert(cls, doc): |
147 | 148 | def remove_document(cls, doc, tenant_id): |
148 | 149 | cls.clear_chunk_num(doc.id) |
149 | 150 | try: |
| 151 | + page = 0 |
| 152 | + page_size = 1000 |
| 153 | + all_chunk_ids = [] |
| 154 | + while True: |
| 155 | + chunks = settings.docStoreConn.search(["img_id"], [], {"doc_id": doc.id}, [], OrderByExpr(), |
| 156 | + page * page_size, page_size, search.index_name(tenant_id), |
| 157 | + [doc.kb_id]) |
| 158 | + chunk_ids = settings.docStoreConn.getChunkIds(chunks) |
| 159 | + if not chunk_ids: |
| 160 | + break |
| 161 | + all_chunk_ids.extend(chunk_ids) |
| 162 | + page += 1 |
| 163 | + for cid in all_chunk_ids: |
| 164 | + if STORAGE_IMPL.obj_exist(doc.kb_id, cid): |
| 165 | + STORAGE_IMPL.rm(doc.kb_id, cid) |
| 166 | + if doc.thumbnail and not doc.thumbnail.startswith(IMG_BASE64_PREFIX): |
| 167 | + if STORAGE_IMPL.obj_exist(doc.kb_id, doc.thumbnail): |
| 168 | + STORAGE_IMPL.rm(doc.kb_id, doc.thumbnail) |
150 | 169 | settings.docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), doc.kb_id) |
| 170 | + |
151 | 171 | graph_source = settings.docStoreConn.getFields( |
152 | 172 | settings.docStoreConn.search(["source_id"], [], {"kb_id": doc.kb_id, "knowledge_graph_kwd": ["graph"]}, [], OrderByExpr(), 0, 1, search.index_name(tenant_id), [doc.kb_id]), ["source_id"] |
153 | 173 | ) |
|
0 commit comments