support images and tables extract from docx (#4619)

This commit is contained in:
Jyong
2024-05-23 18:05:23 +08:00
committed by GitHub
parent 5893ebec55
commit 233c4150d1
10 changed files with 163 additions and 23 deletions

View File

@@ -144,9 +144,9 @@ class DatasetRetrieval:
float('inf')))
for segment in sorted_segments:
if segment.answer:
document_context_list.append(f'question:{segment.content} answer:{segment.answer}')
document_context_list.append(f'question:{segment.get_sign_content()} answer:{segment.answer}')
else:
document_context_list.append(segment.content)
document_context_list.append(segment.get_sign_content())
if show_retrieve_source:
context_list = []
resource_number = 1