Revert "Feat/parent child retrieval" (#12095)

This commit is contained in:
-LAN-
2024-12-25 20:55:44 +08:00
committed by GitHub
parent 9231fdbf4c
commit db2aa83a7c
216 changed files with 3116 additions and 9066 deletions

View File

@@ -1,7 +1,6 @@
import type { DataSourceNotionPage, DataSourceProvider } from './common'
import type { AppIconType, AppMode, RetrievalConfig } from '@/types/app'
import type { Tag } from '@/app/components/base/tag-management/constant'
import type { IndexingType } from '@/app/components/datasets/create/step-two'
export enum DataSourceType {
FILE = 'upload_file',
@@ -11,12 +10,6 @@ export enum DataSourceType {
export type DatasetPermission = 'only_me' | 'all_team_members' | 'partial_members'
export enum ChunkingMode {
'text' = 'text_model', // General text
'qa' = 'qa_model', // General QA
'parentChild' = 'hierarchical_model', // Parent-Child
}
export type DataSet = {
id: string
name: string
@@ -25,12 +18,11 @@ export type DataSet = {
description: string
permission: DatasetPermission
data_source_type: DataSourceType
indexing_technique: IndexingType
indexing_technique: 'high_quality' | 'economy'
created_by: string
updated_by: string
updated_at: number
app_count: number
doc_form: ChunkingMode
document_count: number
word_count: number
provider: string
@@ -103,12 +95,6 @@ export type CustomFile = File & {
created_at?: number
}
export type DocumentItem = {
id: string
name: string
extension: string
}
export type CrawlOptions = {
crawl_sub_pages: boolean
only_main_content: boolean
@@ -158,7 +144,7 @@ export type IndexingEstimateResponse = {
total_price: number
currency: string
total_segments: number
preview: Array<{ content: string; child_chunks: string[] }>
preview: string[]
qa_preview?: QA[]
}
@@ -184,12 +170,7 @@ export type IndexingStatusBatchResponse = {
data: IndexingStatusResponse[]
}
export enum ProcessMode {
general = 'custom',
parentChild = 'hierarchical',
}
export type ParentMode = 'full-doc' | 'paragraph'
export type ProcessMode = 'automatic' | 'custom'
export type ProcessRuleResponse = {
mode: ProcessMode
@@ -200,8 +181,6 @@ export type ProcessRuleResponse = {
export type Rules = {
pre_processing_rules: PreProcessingRule[]
segmentation: Segmentation
parent_mode: ParentMode
subchunk_segmentation: Segmentation
}
export type Limits = {
@@ -216,7 +195,7 @@ export type PreProcessingRule = {
export type Segmentation = {
separator: string
max_tokens: number
chunk_overlap?: number
chunk_overlap: number
}
export const DocumentIndexingStatusList = [
@@ -279,14 +258,13 @@ export type InitialDocumentDetail = {
display_status: DocumentDisplayStatus
completed_segments?: number
total_segments?: number
doc_form: ChunkingMode
doc_form: 'text_model' | 'qa_model'
doc_language: string
}
export type SimpleDocumentDetail = InitialDocumentDetail & {
enabled: boolean
word_count: number
is_qa: boolean // TODO waiting for backend to add this field
error?: string | null
archived: boolean
updated_at: number
@@ -311,7 +289,7 @@ export type DocumentListResponse = {
export type DocumentReq = {
original_document_id?: string
indexing_technique?: string
doc_form: ChunkingMode
doc_form: 'text_model' | 'qa_model'
doc_language: string
process_rule: ProcessRule
}
@@ -353,7 +331,7 @@ export type NotionPage = {
}
export type ProcessRule = {
mode: ProcessMode
mode: string
rules: Rules
}
@@ -363,11 +341,6 @@ export type createDocumentResponse = {
documents: InitialDocumentDetail[]
}
export type PrecessRule = {
mode: ProcessMode
rules: Rules
}
export type FullDocumentDetail = SimpleDocumentDetail & {
batch: string
created_api_request_id: string
@@ -390,8 +363,6 @@ export type FullDocumentDetail = SimpleDocumentDetail & {
doc_type?: DocType | null | 'others'
doc_metadata?: DocMetadata | null
segment_count: number
dataset_process_rule: PrecessRule
document_process_rule: ProcessRule
[key: string]: any
}
@@ -428,12 +399,12 @@ export const SEGMENT_STATUS_LIST = ['waiting', 'completed', 'error', 'indexing']
export type SegmentStatus = typeof SEGMENT_STATUS_LIST[number]
export type SegmentsQuery = {
page?: string
last_id?: string
limit: number
// status?: SegmentStatus
hit_count_gte?: number
keyword?: string
enabled?: boolean | 'all'
enabled?: boolean
}
export type SegmentDetailModel = {
@@ -458,8 +429,6 @@ export type SegmentDetailModel = {
error: string | null
stopped_at: number
answer?: string
child_chunks?: ChildChunkDetail[]
updated_at: number
}
export type SegmentsResponse = {
@@ -467,8 +436,6 @@ export type SegmentsResponse = {
has_more: boolean
limit: number
total: number
total_pages: number
page: number
}
export type HitTestingRecord = {
@@ -481,18 +448,10 @@ export type HitTestingRecord = {
created_at: number
}
export type HitTestingChildChunk = {
id: string
content: string
position: number
score: number
}
export type HitTesting = {
segment: Segment
content: Segment
score: number
tsne_position: TsnePosition
child_chunks?: HitTestingChildChunk[] | null
}
export type ExternalKnowledgeBaseHitTesting = {
@@ -571,7 +530,11 @@ export type SegmentUpdater = {
content: string
answer?: string
keywords?: string[]
regenerate_child_chunks?: boolean
}
export enum DocForm {
TEXT = 'text_model',
QA = 'qa_model',
}
export type ErrorDocsResponse = {
@@ -616,49 +579,3 @@ export const DEFAULT_WEIGHTED_SCORE = {
keyword: 0.3,
},
}
export type ChildChunkType = 'automatic' | 'customized'
export type ChildChunkDetail = {
id: string
position: number
segment_id: string
content: string
word_count: number
created_at: number
updated_at: number
type: ChildChunkType
}
export type ChildSegmentsResponse = {
data: ChildChunkDetail[]
total: number
total_pages: number
page: number
limit: number
}
export type UpdateDocumentParams = {
datasetId: string
documentId: string
}
// Used in api url
export enum DocumentActionType {
enable = 'enable',
disable = 'disable',
archive = 'archive',
unArchive = 'un_archive',
delete = 'delete',
}
export type UpdateDocumentBatchParams = {
datasetId: string
documentId?: string
documentIds?: string[] | string
}
export type BatchImportResponse = {
job_id: string
job_status: string
}