Feat: Support re-segmentation (#114)

Co-authored-by: John Wang <takatost@gmail.com>
Co-authored-by: Jyong <718720800@qq.com>
Co-authored-by: 金伟强 <iamjoel007@gmail.com>
This commit is contained in:
KVOJJJin
2023-06-01 23:19:36 +08:00
committed by GitHub
parent f65a3ad1cc
commit c67f626b66
61 changed files with 1166 additions and 759 deletions

View File

@@ -208,9 +208,10 @@ class DatasetDocumentListApi(Resource):
parser = reqparse.RequestParser()
parser.add_argument('indexing_technique', type=str, choices=Dataset.INDEXING_TECHNIQUE_LIST, nullable=False,
location='json')
parser.add_argument('data_source', type=dict, required=True, nullable=True, location='json')
parser.add_argument('process_rule', type=dict, required=True, nullable=True, location='json')
parser.add_argument('data_source', type=dict, required=False, location='json')
parser.add_argument('process_rule', type=dict, required=False, location='json')
parser.add_argument('duplicate', type=bool, nullable=False, location='json')
parser.add_argument('original_document_id', type=str, required=False, location='json')
args = parser.parse_args()
if not dataset.indexing_technique and not args['indexing_technique']:
@@ -347,10 +348,12 @@ class DocumentIndexingStatusApi(DocumentResource):
completed_segments = DocumentSegment.query \
.filter(DocumentSegment.completed_at.isnot(None),
DocumentSegment.document_id == str(document_id)) \
DocumentSegment.document_id == str(document_id),
DocumentSegment.status != 're_segment') \
.count()
total_segments = DocumentSegment.query \
.filter_by(document_id=str(document_id)) \
.filter(DocumentSegment.document_id == str(document_id),
DocumentSegment.status != 're_segment') \
.count()
document.completed_segments = completed_segments