Add get document detail service api (#21700)
Co-authored-by: lizb <lizb@sugon.com>
This commit is contained in:
@@ -3,7 +3,7 @@ import json
|
||||
from flask import request
|
||||
from flask_restful import marshal, reqparse
|
||||
from sqlalchemy import desc, select
|
||||
from werkzeug.exceptions import NotFound
|
||||
from werkzeug.exceptions import Forbidden, NotFound
|
||||
|
||||
import services
|
||||
from controllers.common.errors import FilenameNotExistsError
|
||||
@@ -18,6 +18,7 @@ from controllers.service_api.app.error import (
|
||||
from controllers.service_api.dataset.error import (
|
||||
ArchivedDocumentImmutableError,
|
||||
DocumentIndexingError,
|
||||
InvalidMetadataError,
|
||||
)
|
||||
from controllers.service_api.wraps import (
|
||||
DatasetApiResource,
|
||||
@@ -466,6 +467,101 @@ class DocumentIndexingStatusApi(DatasetApiResource):
|
||||
return data
|
||||
|
||||
|
||||
class DocumentDetailApi(DatasetApiResource):
|
||||
METADATA_CHOICES = {"all", "only", "without"}
|
||||
|
||||
def get(self, tenant_id, dataset_id, document_id):
|
||||
dataset_id = str(dataset_id)
|
||||
document_id = str(document_id)
|
||||
|
||||
dataset = self.get_dataset(dataset_id, tenant_id)
|
||||
|
||||
document = DocumentService.get_document(dataset.id, document_id)
|
||||
|
||||
if not document:
|
||||
raise NotFound("Document not found.")
|
||||
|
||||
if document.tenant_id != str(tenant_id):
|
||||
raise Forbidden("No permission.")
|
||||
|
||||
metadata = request.args.get("metadata", "all")
|
||||
if metadata not in self.METADATA_CHOICES:
|
||||
raise InvalidMetadataError(f"Invalid metadata value: {metadata}")
|
||||
|
||||
if metadata == "only":
|
||||
response = {"id": document.id, "doc_type": document.doc_type, "doc_metadata": document.doc_metadata_details}
|
||||
elif metadata == "without":
|
||||
dataset_process_rules = DatasetService.get_process_rules(dataset_id)
|
||||
document_process_rules = document.dataset_process_rule.to_dict()
|
||||
data_source_info = document.data_source_detail_dict
|
||||
response = {
|
||||
"id": document.id,
|
||||
"position": document.position,
|
||||
"data_source_type": document.data_source_type,
|
||||
"data_source_info": data_source_info,
|
||||
"dataset_process_rule_id": document.dataset_process_rule_id,
|
||||
"dataset_process_rule": dataset_process_rules,
|
||||
"document_process_rule": document_process_rules,
|
||||
"name": document.name,
|
||||
"created_from": document.created_from,
|
||||
"created_by": document.created_by,
|
||||
"created_at": document.created_at.timestamp(),
|
||||
"tokens": document.tokens,
|
||||
"indexing_status": document.indexing_status,
|
||||
"completed_at": int(document.completed_at.timestamp()) if document.completed_at else None,
|
||||
"updated_at": int(document.updated_at.timestamp()) if document.updated_at else None,
|
||||
"indexing_latency": document.indexing_latency,
|
||||
"error": document.error,
|
||||
"enabled": document.enabled,
|
||||
"disabled_at": int(document.disabled_at.timestamp()) if document.disabled_at else None,
|
||||
"disabled_by": document.disabled_by,
|
||||
"archived": document.archived,
|
||||
"segment_count": document.segment_count,
|
||||
"average_segment_length": document.average_segment_length,
|
||||
"hit_count": document.hit_count,
|
||||
"display_status": document.display_status,
|
||||
"doc_form": document.doc_form,
|
||||
"doc_language": document.doc_language,
|
||||
}
|
||||
else:
|
||||
dataset_process_rules = DatasetService.get_process_rules(dataset_id)
|
||||
document_process_rules = document.dataset_process_rule.to_dict()
|
||||
data_source_info = document.data_source_detail_dict
|
||||
response = {
|
||||
"id": document.id,
|
||||
"position": document.position,
|
||||
"data_source_type": document.data_source_type,
|
||||
"data_source_info": data_source_info,
|
||||
"dataset_process_rule_id": document.dataset_process_rule_id,
|
||||
"dataset_process_rule": dataset_process_rules,
|
||||
"document_process_rule": document_process_rules,
|
||||
"name": document.name,
|
||||
"created_from": document.created_from,
|
||||
"created_by": document.created_by,
|
||||
"created_at": document.created_at.timestamp(),
|
||||
"tokens": document.tokens,
|
||||
"indexing_status": document.indexing_status,
|
||||
"completed_at": int(document.completed_at.timestamp()) if document.completed_at else None,
|
||||
"updated_at": int(document.updated_at.timestamp()) if document.updated_at else None,
|
||||
"indexing_latency": document.indexing_latency,
|
||||
"error": document.error,
|
||||
"enabled": document.enabled,
|
||||
"disabled_at": int(document.disabled_at.timestamp()) if document.disabled_at else None,
|
||||
"disabled_by": document.disabled_by,
|
||||
"archived": document.archived,
|
||||
"doc_type": document.doc_type,
|
||||
"doc_metadata": document.doc_metadata_details,
|
||||
"segment_count": document.segment_count,
|
||||
"average_segment_length": document.average_segment_length,
|
||||
"hit_count": document.hit_count,
|
||||
"display_status": document.display_status,
|
||||
"doc_form": document.doc_form,
|
||||
"doc_language": document.doc_language,
|
||||
}
|
||||
|
||||
return response
|
||||
|
||||
|
||||
api.add_resource(
|
||||
DocumentAddByTextApi,
|
||||
"/datasets/<uuid:dataset_id>/document/create_by_text",
|
||||
@@ -489,3 +585,4 @@ api.add_resource(
|
||||
api.add_resource(DocumentDeleteApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
|
||||
api.add_resource(DocumentListApi, "/datasets/<uuid:dataset_id>/documents")
|
||||
api.add_resource(DocumentIndexingStatusApi, "/datasets/<uuid:dataset_id>/documents/<string:batch>/indexing-status")
|
||||
api.add_resource(DocumentDetailApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
|
||||
|
@@ -11,13 +11,13 @@ from flask_restful import Resource
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import select, update
|
||||
from sqlalchemy.orm import Session
|
||||
from werkzeug.exceptions import Forbidden, Unauthorized
|
||||
from werkzeug.exceptions import Forbidden, NotFound, Unauthorized
|
||||
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_redis import redis_client
|
||||
from libs.login import _get_user
|
||||
from models.account import Account, Tenant, TenantAccountJoin, TenantStatus
|
||||
from models.dataset import RateLimitLog
|
||||
from models.dataset import Dataset, RateLimitLog
|
||||
from models.model import ApiToken, App, EndUser
|
||||
from services.feature_service import FeatureService
|
||||
|
||||
@@ -317,3 +317,11 @@ def create_or_update_end_user_for_user_id(app_model: App, user_id: Optional[str]
|
||||
|
||||
class DatasetApiResource(Resource):
|
||||
method_decorators = [validate_dataset_token]
|
||||
|
||||
def get_dataset(self, dataset_id: str, tenant_id: str) -> Dataset:
|
||||
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id, Dataset.tenant_id == tenant_id).first()
|
||||
|
||||
if not dataset:
|
||||
raise NotFound("Dataset not found.")
|
||||
|
||||
return dataset
|
||||
|
@@ -1124,6 +1124,129 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
||||
|
||||
<hr className='ml-0 mr-0' />
|
||||
|
||||
<Heading
|
||||
url='/datasets/{dataset_id}/documents/{document_id}'
|
||||
method='GET'
|
||||
title='Get Document Detail'
|
||||
name='#get-document-detail'
|
||||
/>
|
||||
<Row>
|
||||
<Col>
|
||||
Get a document's detail.
|
||||
### Path
|
||||
- `dataset_id` (string) Dataset ID
|
||||
- `document_id` (string) Document ID
|
||||
|
||||
### Query
|
||||
- `metadata` (string) Metadata filter, can be `all`, `only`, or `without`. Default is `all`.
|
||||
|
||||
### Response
|
||||
Returns the document's detail.
|
||||
</Col>
|
||||
<Col sticky>
|
||||
### Request Example
|
||||
<CodeGroup title="Request" tag="GET" label="/datasets/{dataset_id}/documents/{document_id}" targetCode={`curl -X GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \\\n-H 'Authorization: Bearer {api_key}'`}>
|
||||
```bash {{ title: 'cURL' }}
|
||||
curl -X GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \
|
||||
-H 'Authorization: Bearer {api_key}'
|
||||
```
|
||||
</CodeGroup>
|
||||
|
||||
### Response Example
|
||||
<CodeGroup title="Response">
|
||||
```json {{ title: 'Response' }}
|
||||
{
|
||||
"id": "f46ae30c-5c11-471b-96d0-464f5f32a7b2",
|
||||
"position": 1,
|
||||
"data_source_type": "upload_file",
|
||||
"data_source_info": {
|
||||
"upload_file": {
|
||||
...
|
||||
}
|
||||
},
|
||||
"dataset_process_rule_id": "24b99906-845e-499f-9e3c-d5565dd6962c",
|
||||
"dataset_process_rule": {
|
||||
"mode": "hierarchical",
|
||||
"rules": {
|
||||
"pre_processing_rules": [
|
||||
{
|
||||
"id": "remove_extra_spaces",
|
||||
"enabled": true
|
||||
},
|
||||
{
|
||||
"id": "remove_urls_emails",
|
||||
"enabled": false
|
||||
}
|
||||
],
|
||||
"segmentation": {
|
||||
"separator": "**********page_ending**********",
|
||||
"max_tokens": 1024,
|
||||
"chunk_overlap": 0
|
||||
},
|
||||
"parent_mode": "paragraph",
|
||||
"subchunk_segmentation": {
|
||||
"separator": "\n",
|
||||
"max_tokens": 512,
|
||||
"chunk_overlap": 0
|
||||
}
|
||||
}
|
||||
},
|
||||
"document_process_rule": {
|
||||
"id": "24b99906-845e-499f-9e3c-d5565dd6962c",
|
||||
"dataset_id": "48a0db76-d1a9-46c1-ae35-2baaa919a8a9",
|
||||
"mode": "hierarchical",
|
||||
"rules": {
|
||||
"pre_processing_rules": [
|
||||
{
|
||||
"id": "remove_extra_spaces",
|
||||
"enabled": true
|
||||
},
|
||||
{
|
||||
"id": "remove_urls_emails",
|
||||
"enabled": false
|
||||
}
|
||||
],
|
||||
"segmentation": {
|
||||
"separator": "**********page_ending**********",
|
||||
"max_tokens": 1024,
|
||||
"chunk_overlap": 0
|
||||
},
|
||||
"parent_mode": "paragraph",
|
||||
"subchunk_segmentation": {
|
||||
"separator": "\n",
|
||||
"max_tokens": 512,
|
||||
"chunk_overlap": 0
|
||||
}
|
||||
}
|
||||
},
|
||||
"name": "xxxx",
|
||||
"created_from": "web",
|
||||
"created_by": "17f71940-a7b5-4c77-b60f-2bd645c1ffa0",
|
||||
"created_at": 1750464191,
|
||||
"tokens": null,
|
||||
"indexing_status": "waiting",
|
||||
"completed_at": null,
|
||||
"updated_at": 1750464191,
|
||||
"indexing_latency": null,
|
||||
"error": null,
|
||||
"enabled": true,
|
||||
"disabled_at": null,
|
||||
"disabled_by": null,
|
||||
"archived": false,
|
||||
"segment_count": 0,
|
||||
"average_segment_length": 0,
|
||||
"hit_count": null,
|
||||
"display_status": "queuing",
|
||||
"doc_form": "hierarchical_model",
|
||||
"doc_language": "Chinese Simplified"
|
||||
}
|
||||
```
|
||||
</CodeGroup>
|
||||
</Col>
|
||||
</Row>
|
||||
___
|
||||
<hr className='ml-0 mr-0' />
|
||||
|
||||
<Heading
|
||||
url='/datasets/{dataset_id}/documents/status/{action}'
|
||||
method='PATCH'
|
||||
|
@@ -881,6 +881,130 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
||||
|
||||
<hr className='ml-0 mr-0' />
|
||||
|
||||
<Heading
|
||||
url='/datasets/{dataset_id}/documents/{document_id}'
|
||||
method='GET'
|
||||
title='ドキュメントの詳細を取得'
|
||||
name='#get-document-detail'
|
||||
/>
|
||||
<Row>
|
||||
<Col>
|
||||
ドキュメントの詳細を取得.
|
||||
### Path
|
||||
- `dataset_id` (string) ナレッジベースID
|
||||
- `document_id` (string) ドキュメントID
|
||||
|
||||
### Query
|
||||
- `metadata` (string) metadataのフィルター条件 `all`、`only`、または`without`。デフォルトは `all`。
|
||||
|
||||
### Response
|
||||
ナレッジベースドキュメントの詳細を返す.
|
||||
</Col>
|
||||
<Col sticky>
|
||||
### Request Example
|
||||
<CodeGroup title="Request" tag="GET" label="/datasets/{dataset_id}/documents/{document_id}" targetCode={`curl -X GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \\\n-H 'Authorization: Bearer {api_key}'`}>
|
||||
```bash {{ title: 'cURL' }}
|
||||
curl -X GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \
|
||||
-H 'Authorization: Bearer {api_key}'
|
||||
```
|
||||
</CodeGroup>
|
||||
|
||||
### Response Example
|
||||
<CodeGroup title="Response">
|
||||
```json {{ title: 'Response' }}
|
||||
{
|
||||
"id": "f46ae30c-5c11-471b-96d0-464f5f32a7b2",
|
||||
"position": 1,
|
||||
"data_source_type": "upload_file",
|
||||
"data_source_info": {
|
||||
"upload_file": {
|
||||
...
|
||||
}
|
||||
},
|
||||
"dataset_process_rule_id": "24b99906-845e-499f-9e3c-d5565dd6962c",
|
||||
"dataset_process_rule": {
|
||||
"mode": "hierarchical",
|
||||
"rules": {
|
||||
"pre_processing_rules": [
|
||||
{
|
||||
"id": "remove_extra_spaces",
|
||||
"enabled": true
|
||||
},
|
||||
{
|
||||
"id": "remove_urls_emails",
|
||||
"enabled": false
|
||||
}
|
||||
],
|
||||
"segmentation": {
|
||||
"separator": "**********page_ending**********",
|
||||
"max_tokens": 1024,
|
||||
"chunk_overlap": 0
|
||||
},
|
||||
"parent_mode": "paragraph",
|
||||
"subchunk_segmentation": {
|
||||
"separator": "\n",
|
||||
"max_tokens": 512,
|
||||
"chunk_overlap": 0
|
||||
}
|
||||
}
|
||||
},
|
||||
"document_process_rule": {
|
||||
"id": "24b99906-845e-499f-9e3c-d5565dd6962c",
|
||||
"dataset_id": "48a0db76-d1a9-46c1-ae35-2baaa919a8a9",
|
||||
"mode": "hierarchical",
|
||||
"rules": {
|
||||
"pre_processing_rules": [
|
||||
{
|
||||
"id": "remove_extra_spaces",
|
||||
"enabled": true
|
||||
},
|
||||
{
|
||||
"id": "remove_urls_emails",
|
||||
"enabled": false
|
||||
}
|
||||
],
|
||||
"segmentation": {
|
||||
"separator": "**********page_ending**********",
|
||||
"max_tokens": 1024,
|
||||
"chunk_overlap": 0
|
||||
},
|
||||
"parent_mode": "paragraph",
|
||||
"subchunk_segmentation": {
|
||||
"separator": "\n",
|
||||
"max_tokens": 512,
|
||||
"chunk_overlap": 0
|
||||
}
|
||||
}
|
||||
},
|
||||
"name": "xxxx",
|
||||
"created_from": "web",
|
||||
"created_by": "17f71940-a7b5-4c77-b60f-2bd645c1ffa0",
|
||||
"created_at": 1750464191,
|
||||
"tokens": null,
|
||||
"indexing_status": "waiting",
|
||||
"completed_at": null,
|
||||
"updated_at": 1750464191,
|
||||
"indexing_latency": null,
|
||||
"error": null,
|
||||
"enabled": true,
|
||||
"disabled_at": null,
|
||||
"disabled_by": null,
|
||||
"archived": false,
|
||||
"segment_count": 0,
|
||||
"average_segment_length": 0,
|
||||
"hit_count": null,
|
||||
"display_status": "queuing",
|
||||
"doc_form": "hierarchical_model",
|
||||
"doc_language": "Chinese Simplified"
|
||||
}
|
||||
```
|
||||
</CodeGroup>
|
||||
</Col>
|
||||
</Row>
|
||||
___
|
||||
<hr className='ml-0 mr-0' />
|
||||
|
||||
|
||||
<Heading
|
||||
url='/datasets/{dataset_id}/documents/status/{action}'
|
||||
method='PATCH'
|
||||
|
@@ -1131,6 +1131,130 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
||||
|
||||
<hr className='ml-0 mr-0' />
|
||||
|
||||
<Heading
|
||||
url='/datasets/{dataset_id}/documents/{document_id}'
|
||||
method='GET'
|
||||
title='获取文档详情'
|
||||
name='#get-document-detail'
|
||||
/>
|
||||
<Row>
|
||||
<Col>
|
||||
获取文档详情.
|
||||
### Path
|
||||
- `dataset_id` (string) 知识库 ID
|
||||
- `document_id` (string) 文档 ID
|
||||
|
||||
### Query
|
||||
- `metadata` (string) metadata 过滤条件 `all`, `only`, 或者 `without`. 默认是 `all`.
|
||||
|
||||
### Response
|
||||
返回知识库文档的详情.
|
||||
</Col>
|
||||
<Col sticky>
|
||||
### Request Example
|
||||
<CodeGroup title="Request" tag="GET" label="/datasets/{dataset_id}/documents/{document_id}" targetCode={`curl -X GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \\\n-H 'Authorization: Bearer {api_key}'`}>
|
||||
```bash {{ title: 'cURL' }}
|
||||
curl -X GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \
|
||||
-H 'Authorization: Bearer {api_key}'
|
||||
```
|
||||
</CodeGroup>
|
||||
|
||||
### Response Example
|
||||
<CodeGroup title="Response">
|
||||
```json {{ title: 'Response' }}
|
||||
{
|
||||
"id": "f46ae30c-5c11-471b-96d0-464f5f32a7b2",
|
||||
"position": 1,
|
||||
"data_source_type": "upload_file",
|
||||
"data_source_info": {
|
||||
"upload_file": {
|
||||
...
|
||||
}
|
||||
},
|
||||
"dataset_process_rule_id": "24b99906-845e-499f-9e3c-d5565dd6962c",
|
||||
"dataset_process_rule": {
|
||||
"mode": "hierarchical",
|
||||
"rules": {
|
||||
"pre_processing_rules": [
|
||||
{
|
||||
"id": "remove_extra_spaces",
|
||||
"enabled": true
|
||||
},
|
||||
{
|
||||
"id": "remove_urls_emails",
|
||||
"enabled": false
|
||||
}
|
||||
],
|
||||
"segmentation": {
|
||||
"separator": "**********page_ending**********",
|
||||
"max_tokens": 1024,
|
||||
"chunk_overlap": 0
|
||||
},
|
||||
"parent_mode": "paragraph",
|
||||
"subchunk_segmentation": {
|
||||
"separator": "\n",
|
||||
"max_tokens": 512,
|
||||
"chunk_overlap": 0
|
||||
}
|
||||
}
|
||||
},
|
||||
"document_process_rule": {
|
||||
"id": "24b99906-845e-499f-9e3c-d5565dd6962c",
|
||||
"dataset_id": "48a0db76-d1a9-46c1-ae35-2baaa919a8a9",
|
||||
"mode": "hierarchical",
|
||||
"rules": {
|
||||
"pre_processing_rules": [
|
||||
{
|
||||
"id": "remove_extra_spaces",
|
||||
"enabled": true
|
||||
},
|
||||
{
|
||||
"id": "remove_urls_emails",
|
||||
"enabled": false
|
||||
}
|
||||
],
|
||||
"segmentation": {
|
||||
"separator": "**********page_ending**********",
|
||||
"max_tokens": 1024,
|
||||
"chunk_overlap": 0
|
||||
},
|
||||
"parent_mode": "paragraph",
|
||||
"subchunk_segmentation": {
|
||||
"separator": "\n",
|
||||
"max_tokens": 512,
|
||||
"chunk_overlap": 0
|
||||
}
|
||||
}
|
||||
},
|
||||
"name": "xxxx",
|
||||
"created_from": "web",
|
||||
"created_by": "17f71940-a7b5-4c77-b60f-2bd645c1ffa0",
|
||||
"created_at": 1750464191,
|
||||
"tokens": null,
|
||||
"indexing_status": "waiting",
|
||||
"completed_at": null,
|
||||
"updated_at": 1750464191,
|
||||
"indexing_latency": null,
|
||||
"error": null,
|
||||
"enabled": true,
|
||||
"disabled_at": null,
|
||||
"disabled_by": null,
|
||||
"archived": false,
|
||||
"segment_count": 0,
|
||||
"average_segment_length": 0,
|
||||
"hit_count": null,
|
||||
"display_status": "queuing",
|
||||
"doc_form": "hierarchical_model",
|
||||
"doc_language": "Chinese Simplified"
|
||||
}
|
||||
```
|
||||
</CodeGroup>
|
||||
</Col>
|
||||
</Row>
|
||||
___
|
||||
<hr className='ml-0 mr-0' />
|
||||
|
||||
|
||||
<Heading
|
||||
url='/datasets/{dataset_id}/documents/status/{action}'
|
||||
method='PATCH'
|
||||
|
Reference in New Issue
Block a user