@@ -43,7 +43,6 @@ from core.model_runtime.errors.invoke import InvokeAuthorizationError
|
||||
from core.plugin.impl.exc import PluginDaemonClientSideError
|
||||
from core.rag.extractor.entity.extract_setting import ExtractSetting
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_redis import redis_client
|
||||
from fields.document_fields import (
|
||||
dataset_and_document_fields,
|
||||
document_fields,
|
||||
@@ -54,8 +53,6 @@ from libs.login import login_required
|
||||
from models import Dataset, DatasetProcessRule, Document, DocumentSegment, UploadFile
|
||||
from services.dataset_service import DatasetService, DocumentService
|
||||
from services.entities.knowledge_entities.knowledge_entities import KnowledgeConfig
|
||||
from tasks.add_document_to_index_task import add_document_to_index_task
|
||||
from tasks.remove_document_from_index_task import remove_document_from_index_task
|
||||
|
||||
|
||||
class DocumentResource(Resource):
|
||||
@@ -862,77 +859,16 @@ class DocumentStatusApi(DocumentResource):
|
||||
DatasetService.check_dataset_permission(dataset, current_user)
|
||||
|
||||
document_ids = request.args.getlist("document_id")
|
||||
for document_id in document_ids:
|
||||
document = self.get_document(dataset_id, document_id)
|
||||
|
||||
indexing_cache_key = "document_{}_indexing".format(document.id)
|
||||
cache_result = redis_client.get(indexing_cache_key)
|
||||
if cache_result is not None:
|
||||
raise InvalidActionError(f"Document:{document.name} is being indexed, please try again later")
|
||||
try:
|
||||
DocumentService.batch_update_document_status(dataset, document_ids, action, current_user)
|
||||
except services.errors.document.DocumentIndexingError as e:
|
||||
raise InvalidActionError(str(e))
|
||||
except ValueError as e:
|
||||
raise InvalidActionError(str(e))
|
||||
except NotFound as e:
|
||||
raise NotFound(str(e))
|
||||
|
||||
if action == "enable":
|
||||
if document.enabled:
|
||||
continue
|
||||
document.enabled = True
|
||||
document.disabled_at = None
|
||||
document.disabled_by = None
|
||||
document.updated_at = datetime.now(UTC).replace(tzinfo=None)
|
||||
db.session.commit()
|
||||
|
||||
# Set cache to prevent indexing the same document multiple times
|
||||
redis_client.setex(indexing_cache_key, 600, 1)
|
||||
|
||||
add_document_to_index_task.delay(document_id)
|
||||
|
||||
elif action == "disable":
|
||||
if not document.completed_at or document.indexing_status != "completed":
|
||||
raise InvalidActionError(f"Document: {document.name} is not completed.")
|
||||
if not document.enabled:
|
||||
continue
|
||||
|
||||
document.enabled = False
|
||||
document.disabled_at = datetime.now(UTC).replace(tzinfo=None)
|
||||
document.disabled_by = current_user.id
|
||||
document.updated_at = datetime.now(UTC).replace(tzinfo=None)
|
||||
db.session.commit()
|
||||
|
||||
# Set cache to prevent indexing the same document multiple times
|
||||
redis_client.setex(indexing_cache_key, 600, 1)
|
||||
|
||||
remove_document_from_index_task.delay(document_id)
|
||||
|
||||
elif action == "archive":
|
||||
if document.archived:
|
||||
continue
|
||||
|
||||
document.archived = True
|
||||
document.archived_at = datetime.now(UTC).replace(tzinfo=None)
|
||||
document.archived_by = current_user.id
|
||||
document.updated_at = datetime.now(UTC).replace(tzinfo=None)
|
||||
db.session.commit()
|
||||
|
||||
if document.enabled:
|
||||
# Set cache to prevent indexing the same document multiple times
|
||||
redis_client.setex(indexing_cache_key, 600, 1)
|
||||
|
||||
remove_document_from_index_task.delay(document_id)
|
||||
|
||||
elif action == "un_archive":
|
||||
if not document.archived:
|
||||
continue
|
||||
document.archived = False
|
||||
document.archived_at = None
|
||||
document.archived_by = None
|
||||
document.updated_at = datetime.now(UTC).replace(tzinfo=None)
|
||||
db.session.commit()
|
||||
|
||||
# Set cache to prevent indexing the same document multiple times
|
||||
redis_client.setex(indexing_cache_key, 600, 1)
|
||||
|
||||
add_document_to_index_task.delay(document_id)
|
||||
|
||||
else:
|
||||
raise InvalidActionError()
|
||||
return {"result": "success"}, 200
|
||||
|
||||
|
||||
|
@@ -4,7 +4,7 @@ from werkzeug.exceptions import Forbidden, NotFound
|
||||
|
||||
import services.dataset_service
|
||||
from controllers.service_api import api
|
||||
from controllers.service_api.dataset.error import DatasetInUseError, DatasetNameDuplicateError
|
||||
from controllers.service_api.dataset.error import DatasetInUseError, DatasetNameDuplicateError, InvalidActionError
|
||||
from controllers.service_api.wraps import (
|
||||
DatasetApiResource,
|
||||
cloud_edition_billing_rate_limit_check,
|
||||
@@ -17,7 +17,7 @@ from fields.dataset_fields import dataset_detail_fields
|
||||
from fields.tag_fields import tag_fields
|
||||
from libs.login import current_user
|
||||
from models.dataset import Dataset, DatasetPermissionEnum
|
||||
from services.dataset_service import DatasetPermissionService, DatasetService
|
||||
from services.dataset_service import DatasetPermissionService, DatasetService, DocumentService
|
||||
from services.entities.knowledge_entities.knowledge_entities import RetrievalModel
|
||||
from services.tag_service import TagService
|
||||
|
||||
@@ -329,6 +329,56 @@ class DatasetApi(DatasetApiResource):
|
||||
raise DatasetInUseError()
|
||||
|
||||
|
||||
class DocumentStatusApi(DatasetApiResource):
|
||||
"""Resource for batch document status operations."""
|
||||
|
||||
def patch(self, tenant_id, dataset_id, action):
|
||||
"""
|
||||
Batch update document status.
|
||||
|
||||
Args:
|
||||
tenant_id: tenant id
|
||||
dataset_id: dataset id
|
||||
action: action to perform (enable, disable, archive, un_archive)
|
||||
|
||||
Returns:
|
||||
dict: A dictionary with a key 'result' and a value 'success'
|
||||
int: HTTP status code 200 indicating that the operation was successful.
|
||||
|
||||
Raises:
|
||||
NotFound: If the dataset with the given ID does not exist.
|
||||
Forbidden: If the user does not have permission.
|
||||
InvalidActionError: If the action is invalid or cannot be performed.
|
||||
"""
|
||||
dataset_id_str = str(dataset_id)
|
||||
dataset = DatasetService.get_dataset(dataset_id_str)
|
||||
|
||||
if dataset is None:
|
||||
raise NotFound("Dataset not found.")
|
||||
|
||||
# Check user's permission
|
||||
try:
|
||||
DatasetService.check_dataset_permission(dataset, current_user)
|
||||
except services.errors.account.NoPermissionError as e:
|
||||
raise Forbidden(str(e))
|
||||
|
||||
# Check dataset model setting
|
||||
DatasetService.check_dataset_model_setting(dataset)
|
||||
|
||||
# Get document IDs from request body
|
||||
data = request.get_json()
|
||||
document_ids = data.get("document_ids", [])
|
||||
|
||||
try:
|
||||
DocumentService.batch_update_document_status(dataset, document_ids, action, current_user)
|
||||
except services.errors.document.DocumentIndexingError as e:
|
||||
raise InvalidActionError(str(e))
|
||||
except ValueError as e:
|
||||
raise InvalidActionError(str(e))
|
||||
|
||||
return {"result": "success"}, 200
|
||||
|
||||
|
||||
class DatasetTagsApi(DatasetApiResource):
|
||||
@validate_dataset_token
|
||||
@marshal_with(tag_fields)
|
||||
@@ -457,6 +507,7 @@ class DatasetTagsBindingStatusApi(DatasetApiResource):
|
||||
|
||||
api.add_resource(DatasetListApi, "/datasets")
|
||||
api.add_resource(DatasetApi, "/datasets/<uuid:dataset_id>")
|
||||
api.add_resource(DocumentStatusApi, "/datasets/<uuid:dataset_id>/documents/status/<string:action>")
|
||||
api.add_resource(DatasetTagsApi, "/datasets/tags")
|
||||
api.add_resource(DatasetTagBindingApi, "/datasets/tags/binding")
|
||||
api.add_resource(DatasetTagUnbindingApi, "/datasets/tags/unbinding")
|
||||
|
@@ -59,6 +59,7 @@ from services.external_knowledge_service import ExternalDatasetService
|
||||
from services.feature_service import FeatureModel, FeatureService
|
||||
from services.tag_service import TagService
|
||||
from services.vector_service import VectorService
|
||||
from tasks.add_document_to_index_task import add_document_to_index_task
|
||||
from tasks.batch_clean_document_task import batch_clean_document_task
|
||||
from tasks.clean_notion_document_task import clean_notion_document_task
|
||||
from tasks.deal_dataset_vector_index_task import deal_dataset_vector_index_task
|
||||
@@ -70,6 +71,7 @@ from tasks.document_indexing_update_task import document_indexing_update_task
|
||||
from tasks.duplicate_document_indexing_task import duplicate_document_indexing_task
|
||||
from tasks.enable_segments_to_index_task import enable_segments_to_index_task
|
||||
from tasks.recover_document_indexing_task import recover_document_indexing_task
|
||||
from tasks.remove_document_from_index_task import remove_document_from_index_task
|
||||
from tasks.retry_document_indexing_task import retry_document_indexing_task
|
||||
from tasks.sync_website_document_indexing_task import sync_website_document_indexing_task
|
||||
|
||||
@@ -434,7 +436,7 @@ class DatasetService:
|
||||
raise ValueError(ex.description)
|
||||
|
||||
filtered_data["updated_by"] = user.id
|
||||
filtered_data["updated_at"] = datetime.datetime.now()
|
||||
filtered_data["updated_at"] = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
||||
|
||||
# update Retrieval model
|
||||
filtered_data["retrieval_model"] = data["retrieval_model"]
|
||||
@@ -1608,6 +1610,99 @@ class DocumentService:
|
||||
if not isinstance(args["process_rule"]["rules"]["segmentation"]["max_tokens"], int):
|
||||
raise ValueError("Process rule segmentation max_tokens is invalid")
|
||||
|
||||
@staticmethod
|
||||
def batch_update_document_status(dataset: Dataset, document_ids: list[str], action: str, user):
|
||||
"""
|
||||
Batch update document status.
|
||||
|
||||
Args:
|
||||
dataset (Dataset): The dataset object
|
||||
document_ids (list[str]): List of document IDs to update
|
||||
action (str): Action to perform (enable, disable, archive, un_archive)
|
||||
user: Current user performing the action
|
||||
|
||||
Raises:
|
||||
DocumentIndexingError: If document is being indexed or not in correct state
|
||||
"""
|
||||
if not document_ids:
|
||||
return
|
||||
|
||||
for document_id in document_ids:
|
||||
document = DocumentService.get_document(dataset.id, document_id)
|
||||
|
||||
if not document:
|
||||
continue
|
||||
|
||||
indexing_cache_key = f"document_{document.id}_indexing"
|
||||
cache_result = redis_client.get(indexing_cache_key)
|
||||
if cache_result is not None:
|
||||
raise DocumentIndexingError(f"Document:{document.name} is being indexed, please try again later")
|
||||
|
||||
if action == "enable":
|
||||
if document.enabled:
|
||||
continue
|
||||
document.enabled = True
|
||||
document.disabled_at = None
|
||||
document.disabled_by = None
|
||||
document.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
||||
db.session.commit()
|
||||
|
||||
# Set cache to prevent indexing the same document multiple times
|
||||
redis_client.setex(indexing_cache_key, 600, 1)
|
||||
|
||||
add_document_to_index_task.delay(document_id)
|
||||
|
||||
elif action == "disable":
|
||||
if not document.completed_at or document.indexing_status != "completed":
|
||||
raise DocumentIndexingError(f"Document: {document.name} is not completed.")
|
||||
if not document.enabled:
|
||||
continue
|
||||
|
||||
document.enabled = False
|
||||
document.disabled_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
||||
document.disabled_by = user.id
|
||||
document.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
||||
db.session.commit()
|
||||
|
||||
# Set cache to prevent indexing the same document multiple times
|
||||
redis_client.setex(indexing_cache_key, 600, 1)
|
||||
|
||||
remove_document_from_index_task.delay(document_id)
|
||||
|
||||
elif action == "archive":
|
||||
if document.archived:
|
||||
continue
|
||||
|
||||
document.archived = True
|
||||
document.archived_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
||||
document.archived_by = user.id
|
||||
document.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
||||
db.session.commit()
|
||||
|
||||
if document.enabled:
|
||||
# Set cache to prevent indexing the same document multiple times
|
||||
redis_client.setex(indexing_cache_key, 600, 1)
|
||||
|
||||
remove_document_from_index_task.delay(document_id)
|
||||
|
||||
elif action == "un_archive":
|
||||
if not document.archived:
|
||||
continue
|
||||
document.archived = False
|
||||
document.archived_at = None
|
||||
document.archived_by = None
|
||||
document.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
||||
db.session.commit()
|
||||
|
||||
# Only re-index if the document is currently enabled
|
||||
if document.enabled:
|
||||
# Set cache to prevent indexing the same document multiple times
|
||||
redis_client.setex(indexing_cache_key, 600, 1)
|
||||
add_document_to_index_task.delay(document_id)
|
||||
|
||||
else:
|
||||
raise ValueError(f"Invalid action: {action}")
|
||||
|
||||
|
||||
class SegmentService:
|
||||
@classmethod
|
||||
|
@@ -1124,6 +1124,63 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
||||
|
||||
<hr className='ml-0 mr-0' />
|
||||
|
||||
<Heading
|
||||
url='/datasets/{dataset_id}/documents/status/{action}'
|
||||
method='PATCH'
|
||||
title='Update Document Status'
|
||||
name='#batch_document_status'
|
||||
/>
|
||||
<Row>
|
||||
<Col>
|
||||
### Path
|
||||
<Properties>
|
||||
<Property name='dataset_id' type='string' key='dataset_id'>
|
||||
Knowledge ID
|
||||
</Property>
|
||||
<Property name='action' type='string' key='action'>
|
||||
- `enable` - Enable document
|
||||
- `disable` - Disable document
|
||||
- `archive` - Archive document
|
||||
- `un_archive` - Unarchive document
|
||||
</Property>
|
||||
</Properties>
|
||||
|
||||
### Request Body
|
||||
<Properties>
|
||||
<Property name='document_ids' type='array[string]' key='document_ids'>
|
||||
List of document IDs
|
||||
</Property>
|
||||
</Properties>
|
||||
</Col>
|
||||
<Col sticky>
|
||||
<CodeGroup
|
||||
title="Request"
|
||||
tag="PATCH"
|
||||
label="/datasets/{dataset_id}/documents/status/{action}"
|
||||
targetCode={`curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/status/{action}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{\n "document_ids": ["doc-id-1", "doc-id-2"]\n}'`}
|
||||
>
|
||||
```bash {{ title: 'cURL' }}
|
||||
curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/status/{action}' \
|
||||
--header 'Authorization: Bearer {api_key}' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"document_ids": ["doc-id-1", "doc-id-2"]
|
||||
}'
|
||||
```
|
||||
</CodeGroup>
|
||||
|
||||
<CodeGroup title="Response">
|
||||
```json {{ title: 'Response' }}
|
||||
{
|
||||
"result": "success"
|
||||
}
|
||||
```
|
||||
</CodeGroup>
|
||||
</Col>
|
||||
</Row>
|
||||
|
||||
<hr className='ml-0 mr-0' />
|
||||
|
||||
<Heading
|
||||
url='/datasets/{dataset_id}/documents/{document_id}/segments'
|
||||
method='POST'
|
||||
|
@@ -881,6 +881,63 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
||||
|
||||
<hr className='ml-0 mr-0' />
|
||||
|
||||
<Heading
|
||||
url='/datasets/{dataset_id}/documents/status/{action}'
|
||||
method='PATCH'
|
||||
title='ドキュメントステータスの更新'
|
||||
name='#batch_document_status'
|
||||
/>
|
||||
<Row>
|
||||
<Col>
|
||||
### パス
|
||||
<Properties>
|
||||
<Property name='dataset_id' type='string' key='dataset_id'>
|
||||
ナレッジ ID
|
||||
</Property>
|
||||
<Property name='action' type='string' key='action'>
|
||||
- `enable` - ドキュメントを有効化
|
||||
- `disable` - ドキュメントを無効化
|
||||
- `archive` - ドキュメントをアーカイブ
|
||||
- `un_archive` - ドキュメントのアーカイブを解除
|
||||
</Property>
|
||||
</Properties>
|
||||
|
||||
### リクエストボディ
|
||||
<Properties>
|
||||
<Property name='document_ids' type='array[string]' key='document_ids'>
|
||||
ドキュメントIDのリスト
|
||||
</Property>
|
||||
</Properties>
|
||||
</Col>
|
||||
<Col sticky>
|
||||
<CodeGroup
|
||||
title="リクエスト"
|
||||
tag="PATCH"
|
||||
label="/datasets/{dataset_id}/documents/status/{action}"
|
||||
targetCode={`curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/status/{action}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{\n "document_ids": ["doc-id-1", "doc-id-2"]\n}'`}
|
||||
>
|
||||
```bash {{ title: 'cURL' }}
|
||||
curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/status/{action}' \
|
||||
--header 'Authorization: Bearer {api_key}' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"document_ids": ["doc-id-1", "doc-id-2"]
|
||||
}'
|
||||
```
|
||||
</CodeGroup>
|
||||
|
||||
<CodeGroup title="レスポンス">
|
||||
```json {{ title: 'Response' }}
|
||||
{
|
||||
"result": "success"
|
||||
}
|
||||
```
|
||||
</CodeGroup>
|
||||
</Col>
|
||||
</Row>
|
||||
|
||||
<hr className='ml-0 mr-0' />
|
||||
|
||||
<Heading
|
||||
url='/datasets/{dataset_id}/documents/{document_id}/segments'
|
||||
method='POST'
|
||||
@@ -2413,3 +2470,4 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
||||
</tbody>
|
||||
</table>
|
||||
<div className="pb-4" />
|
||||
|
||||
|
@@ -1131,6 +1131,63 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
||||
|
||||
<hr className='ml-0 mr-0' />
|
||||
|
||||
<Heading
|
||||
url='/datasets/{dataset_id}/documents/status/{action}'
|
||||
method='PATCH'
|
||||
title='更新文档状态'
|
||||
name='#batch_document_status'
|
||||
/>
|
||||
<Row>
|
||||
<Col>
|
||||
### Path
|
||||
<Properties>
|
||||
<Property name='dataset_id' type='string' key='dataset_id'>
|
||||
知识库 ID
|
||||
</Property>
|
||||
<Property name='action' type='string' key='action'>
|
||||
- `enable` - 启用文档
|
||||
- `disable` - 禁用文档
|
||||
- `archive` - 归档文档
|
||||
- `un_archive` - 取消归档文档
|
||||
</Property>
|
||||
</Properties>
|
||||
|
||||
### Request Body
|
||||
<Properties>
|
||||
<Property name='document_ids' type='array[string]' key='document_ids'>
|
||||
文档ID列表
|
||||
</Property>
|
||||
</Properties>
|
||||
</Col>
|
||||
<Col sticky>
|
||||
<CodeGroup
|
||||
title="Request"
|
||||
tag="PATCH"
|
||||
label="/datasets/{dataset_id}/documents/status/{action}"
|
||||
targetCode={`curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/status/{action}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{\n "document_ids": ["doc-id-1", "doc-id-2"]\n}'`}
|
||||
>
|
||||
```bash {{ title: 'cURL' }}
|
||||
curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/status/{action}' \
|
||||
--header 'Authorization: Bearer {api_key}' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"document_ids": ["doc-id-1", "doc-id-2"]
|
||||
}'
|
||||
```
|
||||
</CodeGroup>
|
||||
|
||||
<CodeGroup title="Response">
|
||||
```json {{ title: 'Response' }}
|
||||
{
|
||||
"result": "success"
|
||||
}
|
||||
```
|
||||
</CodeGroup>
|
||||
</Col>
|
||||
</Row>
|
||||
|
||||
<hr className='ml-0 mr-0' />
|
||||
|
||||
<Heading
|
||||
url='/datasets/{dataset_id}/documents/{document_id}/segments'
|
||||
method='POST'
|
||||
|
Reference in New Issue
Block a user