feat: parent child retrieval (#12106)
Signed-off-by: -LAN- <laipz8200@outlook.com> Co-authored-by: -LAN- <laipz8200@outlook.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
import type { DataSourceNotionPage, DataSourceProvider } from './common'
|
||||
import type { AppIconType, AppMode, RetrievalConfig } from '@/types/app'
|
||||
import type { Tag } from '@/app/components/base/tag-management/constant'
|
||||
import type { IndexingType } from '@/app/components/datasets/create/step-two'
|
||||
|
||||
export enum DataSourceType {
|
||||
FILE = 'upload_file',
|
||||
@@ -10,6 +11,12 @@ export enum DataSourceType {
|
||||
|
||||
export type DatasetPermission = 'only_me' | 'all_team_members' | 'partial_members'
|
||||
|
||||
export enum ChunkingMode {
|
||||
'text' = 'text_model', // General text
|
||||
'qa' = 'qa_model', // General QA
|
||||
'parentChild' = 'hierarchical_model', // Parent-Child
|
||||
}
|
||||
|
||||
export type DataSet = {
|
||||
id: string
|
||||
name: string
|
||||
@@ -18,11 +25,12 @@ export type DataSet = {
|
||||
description: string
|
||||
permission: DatasetPermission
|
||||
data_source_type: DataSourceType
|
||||
indexing_technique: 'high_quality' | 'economy'
|
||||
indexing_technique: IndexingType
|
||||
created_by: string
|
||||
updated_by: string
|
||||
updated_at: number
|
||||
app_count: number
|
||||
doc_form: ChunkingMode
|
||||
document_count: number
|
||||
word_count: number
|
||||
provider: string
|
||||
@@ -95,6 +103,12 @@ export type CustomFile = File & {
|
||||
created_at?: number
|
||||
}
|
||||
|
||||
export type DocumentItem = {
|
||||
id: string
|
||||
name: string
|
||||
extension: string
|
||||
}
|
||||
|
||||
export type CrawlOptions = {
|
||||
crawl_sub_pages: boolean
|
||||
only_main_content: boolean
|
||||
@@ -144,7 +158,7 @@ export type IndexingEstimateResponse = {
|
||||
total_price: number
|
||||
currency: string
|
||||
total_segments: number
|
||||
preview: string[]
|
||||
preview: Array<{ content: string; child_chunks: string[] }>
|
||||
qa_preview?: QA[]
|
||||
}
|
||||
|
||||
@@ -170,7 +184,12 @@ export type IndexingStatusBatchResponse = {
|
||||
data: IndexingStatusResponse[]
|
||||
}
|
||||
|
||||
export type ProcessMode = 'automatic' | 'custom'
|
||||
export enum ProcessMode {
|
||||
general = 'custom',
|
||||
parentChild = 'hierarchical',
|
||||
}
|
||||
|
||||
export type ParentMode = 'full-doc' | 'paragraph'
|
||||
|
||||
export type ProcessRuleResponse = {
|
||||
mode: ProcessMode
|
||||
@@ -181,6 +200,8 @@ export type ProcessRuleResponse = {
|
||||
export type Rules = {
|
||||
pre_processing_rules: PreProcessingRule[]
|
||||
segmentation: Segmentation
|
||||
parent_mode: ParentMode
|
||||
subchunk_segmentation: Segmentation
|
||||
}
|
||||
|
||||
export type Limits = {
|
||||
@@ -195,7 +216,7 @@ export type PreProcessingRule = {
|
||||
export type Segmentation = {
|
||||
separator: string
|
||||
max_tokens: number
|
||||
chunk_overlap: number
|
||||
chunk_overlap?: number
|
||||
}
|
||||
|
||||
export const DocumentIndexingStatusList = [
|
||||
@@ -258,13 +279,14 @@ export type InitialDocumentDetail = {
|
||||
display_status: DocumentDisplayStatus
|
||||
completed_segments?: number
|
||||
total_segments?: number
|
||||
doc_form: 'text_model' | 'qa_model'
|
||||
doc_form: ChunkingMode
|
||||
doc_language: string
|
||||
}
|
||||
|
||||
export type SimpleDocumentDetail = InitialDocumentDetail & {
|
||||
enabled: boolean
|
||||
word_count: number
|
||||
is_qa: boolean // TODO waiting for backend to add this field
|
||||
error?: string | null
|
||||
archived: boolean
|
||||
updated_at: number
|
||||
@@ -289,7 +311,7 @@ export type DocumentListResponse = {
|
||||
export type DocumentReq = {
|
||||
original_document_id?: string
|
||||
indexing_technique?: string
|
||||
doc_form: 'text_model' | 'qa_model'
|
||||
doc_form: ChunkingMode
|
||||
doc_language: string
|
||||
process_rule: ProcessRule
|
||||
}
|
||||
@@ -331,7 +353,7 @@ export type NotionPage = {
|
||||
}
|
||||
|
||||
export type ProcessRule = {
|
||||
mode: string
|
||||
mode: ProcessMode
|
||||
rules: Rules
|
||||
}
|
||||
|
||||
@@ -341,6 +363,11 @@ export type createDocumentResponse = {
|
||||
documents: InitialDocumentDetail[]
|
||||
}
|
||||
|
||||
export type PrecessRule = {
|
||||
mode: ProcessMode
|
||||
rules: Rules
|
||||
}
|
||||
|
||||
export type FullDocumentDetail = SimpleDocumentDetail & {
|
||||
batch: string
|
||||
created_api_request_id: string
|
||||
@@ -363,6 +390,8 @@ export type FullDocumentDetail = SimpleDocumentDetail & {
|
||||
doc_type?: DocType | null | 'others'
|
||||
doc_metadata?: DocMetadata | null
|
||||
segment_count: number
|
||||
dataset_process_rule: PrecessRule
|
||||
document_process_rule: ProcessRule
|
||||
[key: string]: any
|
||||
}
|
||||
|
||||
@@ -399,12 +428,12 @@ export const SEGMENT_STATUS_LIST = ['waiting', 'completed', 'error', 'indexing']
|
||||
export type SegmentStatus = typeof SEGMENT_STATUS_LIST[number]
|
||||
|
||||
export type SegmentsQuery = {
|
||||
last_id?: string
|
||||
page?: string
|
||||
limit: number
|
||||
// status?: SegmentStatus
|
||||
hit_count_gte?: number
|
||||
keyword?: string
|
||||
enabled?: boolean
|
||||
enabled?: boolean | 'all'
|
||||
}
|
||||
|
||||
export type SegmentDetailModel = {
|
||||
@@ -429,6 +458,8 @@ export type SegmentDetailModel = {
|
||||
error: string | null
|
||||
stopped_at: number
|
||||
answer?: string
|
||||
child_chunks?: ChildChunkDetail[]
|
||||
updated_at: number
|
||||
}
|
||||
|
||||
export type SegmentsResponse = {
|
||||
@@ -436,6 +467,8 @@ export type SegmentsResponse = {
|
||||
has_more: boolean
|
||||
limit: number
|
||||
total: number
|
||||
total_pages: number
|
||||
page: number
|
||||
}
|
||||
|
||||
export type HitTestingRecord = {
|
||||
@@ -448,10 +481,18 @@ export type HitTestingRecord = {
|
||||
created_at: number
|
||||
}
|
||||
|
||||
export type HitTestingChildChunk = {
|
||||
id: string
|
||||
content: string
|
||||
position: number
|
||||
score: number
|
||||
}
|
||||
export type HitTesting = {
|
||||
segment: Segment
|
||||
content: Segment
|
||||
score: number
|
||||
tsne_position: TsnePosition
|
||||
child_chunks?: HitTestingChildChunk[] | null
|
||||
}
|
||||
|
||||
export type ExternalKnowledgeBaseHitTesting = {
|
||||
@@ -530,11 +571,7 @@ export type SegmentUpdater = {
|
||||
content: string
|
||||
answer?: string
|
||||
keywords?: string[]
|
||||
}
|
||||
|
||||
export enum DocForm {
|
||||
TEXT = 'text_model',
|
||||
QA = 'qa_model',
|
||||
regenerate_child_chunks?: boolean
|
||||
}
|
||||
|
||||
export type ErrorDocsResponse = {
|
||||
@@ -579,3 +616,49 @@ export const DEFAULT_WEIGHTED_SCORE = {
|
||||
keyword: 0.3,
|
||||
},
|
||||
}
|
||||
|
||||
export type ChildChunkType = 'automatic' | 'customized'
|
||||
|
||||
export type ChildChunkDetail = {
|
||||
id: string
|
||||
position: number
|
||||
segment_id: string
|
||||
content: string
|
||||
word_count: number
|
||||
created_at: number
|
||||
updated_at: number
|
||||
type: ChildChunkType
|
||||
}
|
||||
|
||||
export type ChildSegmentsResponse = {
|
||||
data: ChildChunkDetail[]
|
||||
total: number
|
||||
total_pages: number
|
||||
page: number
|
||||
limit: number
|
||||
}
|
||||
|
||||
export type UpdateDocumentParams = {
|
||||
datasetId: string
|
||||
documentId: string
|
||||
}
|
||||
|
||||
// Used in api url
|
||||
export enum DocumentActionType {
|
||||
enable = 'enable',
|
||||
disable = 'disable',
|
||||
archive = 'archive',
|
||||
unArchive = 'un_archive',
|
||||
delete = 'delete',
|
||||
}
|
||||
|
||||
export type UpdateDocumentBatchParams = {
|
||||
datasetId: string
|
||||
documentId?: string
|
||||
documentIds?: string[] | string
|
||||
}
|
||||
|
||||
export type BatchImportResponse = {
|
||||
job_id: string
|
||||
job_status: string
|
||||
}
|
||||
|
Reference in New Issue
Block a user