Feat: Q&A format segmentation support (#668)

Co-authored-by: jyong <718720800@qq.com>
Co-authored-by: StyleZhang <jasonapring2015@outlook.com>
This commit is contained in:
KVOJJJin
2023-07-28 20:47:15 +08:00
committed by GitHub
parent aae2fb8a30
commit cf93d8d6e2
52 changed files with 2038 additions and 274 deletions

View File

@@ -206,6 +206,8 @@ class Document(db.Model):
server_default=db.text('CURRENT_TIMESTAMP(0)'))
doc_type = db.Column(db.String(40), nullable=True)
doc_metadata = db.Column(db.JSON, nullable=True)
doc_form = db.Column(db.String(
255), nullable=False, server_default=db.text("'text_model'::character varying"))
DATA_SOURCES = ['upload_file', 'notion_import']
@@ -308,6 +310,7 @@ class DocumentSegment(db.Model):
document_id = db.Column(UUID, nullable=False)
position = db.Column(db.Integer, nullable=False)
content = db.Column(db.Text, nullable=False)
answer = db.Column(db.Text, nullable=True)
word_count = db.Column(db.Integer, nullable=False)
tokens = db.Column(db.Integer, nullable=False)
@@ -327,6 +330,9 @@ class DocumentSegment(db.Model):
created_by = db.Column(UUID, nullable=False)
created_at = db.Column(db.DateTime, nullable=False,
server_default=db.text('CURRENT_TIMESTAMP(0)'))
updated_by = db.Column(UUID, nullable=True)
updated_at = db.Column(db.DateTime, nullable=False,
server_default=db.text('CURRENT_TIMESTAMP(0)'))
indexing_at = db.Column(db.DateTime, nullable=True)
completed_at = db.Column(db.DateTime, nullable=True)
error = db.Column(db.Text, nullable=True)
@@ -442,4 +448,4 @@ class Embedding(db.Model):
self.embedding = pickle.dumps(embedding_data, protocol=pickle.HIGHEST_PROTOCOL)
def get_embedding(self) -> list[float]:
return pickle.loads(self.embedding)
return pickle.loads(self.embedding)