fix: drop dead code phase2 unused class (#22042)

Signed-off-by: yihong0618 <zouzou0208@gmail.com>
This commit is contained in:
yihong
2025-07-17 09:33:07 +08:00
committed by GitHub
parent 3587bd4040
commit d2933c2bfe
23 changed files with 1 additions and 576 deletions

View File

@@ -25,12 +25,6 @@ class UnsupportedFileTypeError(BaseHTTPException):
code = 415
class HighQualityDatasetOnlyError(BaseHTTPException):
error_code = "high_quality_dataset_only"
description = "Current operation only supports 'high-quality' datasets."
code = 400
class DatasetNotInitializedError(BaseHTTPException):
error_code = "dataset_not_initialized"
description = "The dataset is still being initialized or indexing. Please wait a moment."

View File

@@ -13,12 +13,6 @@ class CurrentPasswordIncorrectError(BaseHTTPException):
code = 400
class ProviderRequestFailedError(BaseHTTPException):
error_code = "provider_request_failed"
description = None
code = 400
class InvalidInvitationCodeError(BaseHTTPException):
error_code = "invalid_invitation_code"
description = "Invalid invitation code."

View File

@@ -25,12 +25,6 @@ class UnsupportedFileTypeError(BaseHTTPException):
code = 415
class HighQualityDatasetOnlyError(BaseHTTPException):
error_code = "high_quality_dataset_only"
description = "Current operation only supports 'high-quality' datasets."
code = 400
class DatasetNotInitializedError(BaseHTTPException):
error_code = "dataset_not_initialized"
description = "The dataset is still being initialized or indexing. Please wait a moment."

View File

@@ -10,8 +10,3 @@ class RecordNotFoundError(TaskPipilineError):
class WorkflowRunNotFoundError(RecordNotFoundError):
def __init__(self, workflow_run_id: str):
super().__init__("WorkflowRun", workflow_run_id)
class WorkflowNodeExecutionNotFoundError(RecordNotFoundError):
def __init__(self, workflow_node_execution_id: str):
super().__init__("WorkflowNodeExecution", workflow_node_execution_id)

View File

@@ -7,13 +7,6 @@ if TYPE_CHECKING:
_tool_file_manager_factory: Callable[[], "ToolFileManager"] | None = None
class ToolFileParser:
@staticmethod
def get_tool_file_manager() -> "ToolFileManager":
assert _tool_file_manager_factory is not None
return _tool_file_manager_factory()
def set_tool_file_manager_factory(factory: Callable[[], "ToolFileManager"]) -> None:
global _tool_file_manager_factory
_tool_file_manager_factory = factory

View File

@@ -1,52 +0,0 @@
import base64
import hashlib
import hmac
import os
import time
from pydantic import BaseModel, Field
from configs import dify_config
class SignedUrlParams(BaseModel):
sign_key: str = Field(..., description="The sign key")
timestamp: str = Field(..., description="Timestamp")
nonce: str = Field(..., description="Nonce")
sign: str = Field(..., description="Signature")
class UrlSigner:
@classmethod
def get_signed_url(cls, url: str, sign_key: str, prefix: str) -> str:
signed_url_params = cls.get_signed_url_params(sign_key, prefix)
return (
f"{url}?timestamp={signed_url_params.timestamp}"
f"&nonce={signed_url_params.nonce}&sign={signed_url_params.sign}"
)
@classmethod
def get_signed_url_params(cls, sign_key: str, prefix: str) -> SignedUrlParams:
timestamp = str(int(time.time()))
nonce = os.urandom(16).hex()
sign = cls._sign(sign_key, timestamp, nonce, prefix)
return SignedUrlParams(sign_key=sign_key, timestamp=timestamp, nonce=nonce, sign=sign)
@classmethod
def verify(cls, sign_key: str, timestamp: str, nonce: str, sign: str, prefix: str) -> bool:
recalculated_sign = cls._sign(sign_key, timestamp, nonce, prefix)
return sign == recalculated_sign
@classmethod
def _sign(cls, sign_key: str, timestamp: str, nonce: str, prefix: str) -> str:
if not dify_config.SECRET_KEY:
raise Exception("SECRET_KEY is not set")
data_to_sign = f"{prefix}|{sign_key}|{timestamp}|{nonce}"
secret_key = dify_config.SECRET_KEY.encode()
sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
encoded_sign = base64.urlsafe_b64encode(sign).decode()
return encoded_sign

View File

@@ -135,17 +135,6 @@ class PluginEntity(PluginInstallation):
return self
class GithubPackage(BaseModel):
repo: str
version: str
package: str
class GithubVersion(BaseModel):
repo: str
version: str
class GenericProviderID:
organization: str
plugin_name: str

View File

@@ -1,12 +0,0 @@
"""Abstract interface for document clean implementations."""
from core.rag.cleaner.cleaner_base import BaseCleaner
class UnstructuredNonAsciiCharsCleaner(BaseCleaner):
def clean(self, content) -> str:
"""clean document content."""
from unstructured.cleaners.core import clean_extra_whitespace
# Returns "ITEM 1A: RISK FACTORS"
return clean_extra_whitespace(content)

View File

@@ -1,15 +0,0 @@
"""Abstract interface for document clean implementations."""
from core.rag.cleaner.cleaner_base import BaseCleaner
class UnstructuredGroupBrokenParagraphsCleaner(BaseCleaner):
def clean(self, content) -> str:
"""clean document content."""
import re
from unstructured.cleaners.core import group_broken_paragraphs
para_split_re = re.compile(r"(\s*\n\s*){3}")
return group_broken_paragraphs(content, paragraph_split=para_split_re)

View File

@@ -1,12 +0,0 @@
"""Abstract interface for document clean implementations."""
from core.rag.cleaner.cleaner_base import BaseCleaner
class UnstructuredNonAsciiCharsCleaner(BaseCleaner):
def clean(self, content) -> str:
"""clean document content."""
from unstructured.cleaners.core import clean_non_ascii_chars
# Returns "This text contains non-ascii characters!"
return clean_non_ascii_chars(content)

View File

@@ -1,12 +0,0 @@
"""Abstract interface for document clean implementations."""
from core.rag.cleaner.cleaner_base import BaseCleaner
class UnstructuredNonAsciiCharsCleaner(BaseCleaner):
def clean(self, content) -> str:
"""Replaces unicode quote characters, such as the \x91 character in a string."""
from unstructured.cleaners.core import replace_unicode_quotes
return replace_unicode_quotes(content)

View File

@@ -1,11 +0,0 @@
"""Abstract interface for document clean implementations."""
from core.rag.cleaner.cleaner_base import BaseCleaner
class UnstructuredTranslateTextCleaner(BaseCleaner):
def clean(self, content) -> str:
"""clean document content."""
from unstructured.cleaners.translate import translate_text
return translate_text(content)

View File

@@ -1,17 +0,0 @@
from typing import Optional
from pydantic import BaseModel
class ClusterEntity(BaseModel):
"""
Model Config Entity.
"""
name: str
cluster_id: str
displayName: str
region: str
spendingLimit: Optional[int] = 1000
version: str
createdBy: str

View File

@@ -9,8 +9,7 @@ from __future__ import annotations
import contextlib
import mimetypes
from abc import ABC, abstractmethod
from collections.abc import Generator, Iterable, Mapping
from collections.abc import Generator, Mapping
from io import BufferedReader, BytesIO
from pathlib import Path, PurePath
from typing import Any, Optional, Union
@@ -143,21 +142,3 @@ class Blob(BaseModel):
if self.source:
str_repr += f" {self.source}"
return str_repr
class BlobLoader(ABC):
"""Abstract interface for blob loaders implementation.
Implementer should be able to load raw content from a datasource system according
to some criteria and return the raw content lazily as a stream of blobs.
"""
@abstractmethod
def yield_blobs(
self,
) -> Iterable[Blob]:
"""A lazy loader for raw data represented by Blob object.
Returns:
A generator over blobs
"""

View File

@@ -1,47 +0,0 @@
import logging
from core.rag.extractor.extractor_base import BaseExtractor
from core.rag.models.document import Document
logger = logging.getLogger(__name__)
class UnstructuredPDFExtractor(BaseExtractor):
"""Load pdf files.
Args:
file_path: Path to the file to load.
api_url: Unstructured API URL
api_key: Unstructured API Key
"""
def __init__(self, file_path: str, api_url: str, api_key: str):
"""Initialize with file path."""
self._file_path = file_path
self._api_url = api_url
self._api_key = api_key
def extract(self) -> list[Document]:
if self._api_url:
from unstructured.partition.api import partition_via_api
elements = partition_via_api(
filename=self._file_path, api_url=self._api_url, api_key=self._api_key, strategy="auto"
)
else:
from unstructured.partition.pdf import partition_pdf
elements = partition_pdf(filename=self._file_path, strategy="auto")
from unstructured.chunking.title import chunk_by_title
chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
documents = []
for chunk in chunks:
text = chunk.text.strip()
documents.append(Document(page_content=text))
return documents

View File

@@ -1,34 +0,0 @@
import logging
from core.rag.extractor.extractor_base import BaseExtractor
from core.rag.models.document import Document
logger = logging.getLogger(__name__)
class UnstructuredTextExtractor(BaseExtractor):
"""Load msg files.
Args:
file_path: Path to the file to load.
"""
def __init__(self, file_path: str, api_url: str):
"""Initialize with file path."""
self._file_path = file_path
self._api_url = api_url
def extract(self) -> list[Document]:
from unstructured.partition.text import partition_text
elements = partition_text(filename=self._file_path)
from unstructured.chunking.title import chunk_by_title
chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
documents = []
for chunk in chunks:
text = chunk.text.strip()
documents.append(Document(page_content=text))
return documents

View File

@@ -10,7 +10,6 @@ from typing import (
Any,
Literal,
Optional,
TypedDict,
TypeVar,
Union,
)
@@ -168,167 +167,6 @@ class TextSplitter(BaseDocumentTransformer, ABC):
raise NotImplementedError
class CharacterTextSplitter(TextSplitter):
"""Splitting text that looks at characters."""
def __init__(self, separator: str = "\n\n", **kwargs: Any) -> None:
"""Create a new TextSplitter."""
super().__init__(**kwargs)
self._separator = separator
def split_text(self, text: str) -> list[str]:
"""Split incoming text and return chunks."""
# First we naively split the large input into a bunch of smaller ones.
splits = _split_text_with_regex(text, self._separator, self._keep_separator)
_separator = "" if self._keep_separator else self._separator
_good_splits_lengths = [] # cache the lengths of the splits
if splits:
_good_splits_lengths.extend(self._length_function(splits))
return self._merge_splits(splits, _separator, _good_splits_lengths)
class LineType(TypedDict):
"""Line type as typed dict."""
metadata: dict[str, str]
content: str
class HeaderType(TypedDict):
"""Header type as typed dict."""
level: int
name: str
data: str
class MarkdownHeaderTextSplitter:
"""Splitting markdown files based on specified headers."""
def __init__(self, headers_to_split_on: list[tuple[str, str]], return_each_line: bool = False):
"""Create a new MarkdownHeaderTextSplitter.
Args:
headers_to_split_on: Headers we want to track
return_each_line: Return each line w/ associated headers
"""
# Output line-by-line or aggregated into chunks w/ common headers
self.return_each_line = return_each_line
# Given the headers we want to split on,
# (e.g., "#, ##, etc") order by length
self.headers_to_split_on = sorted(headers_to_split_on, key=lambda split: len(split[0]), reverse=True)
def aggregate_lines_to_chunks(self, lines: list[LineType]) -> list[Document]:
"""Combine lines with common metadata into chunks
Args:
lines: Line of text / associated header metadata
"""
aggregated_chunks: list[LineType] = []
for line in lines:
if aggregated_chunks and aggregated_chunks[-1]["metadata"] == line["metadata"]:
# If the last line in the aggregated list
# has the same metadata as the current line,
# append the current content to the last lines's content
aggregated_chunks[-1]["content"] += " \n" + line["content"]
else:
# Otherwise, append the current line to the aggregated list
aggregated_chunks.append(line)
return [Document(page_content=chunk["content"], metadata=chunk["metadata"]) for chunk in aggregated_chunks]
def split_text(self, text: str) -> list[Document]:
"""Split markdown file
Args:
text: Markdown file"""
# Split the input text by newline character ("\n").
lines = text.split("\n")
# Final output
lines_with_metadata: list[LineType] = []
# Content and metadata of the chunk currently being processed
current_content: list[str] = []
current_metadata: dict[str, str] = {}
# Keep track of the nested header structure
# header_stack: List[Dict[str, Union[int, str]]] = []
header_stack: list[HeaderType] = []
initial_metadata: dict[str, str] = {}
for line in lines:
stripped_line = line.strip()
# Check each line against each of the header types (e.g., #, ##)
for sep, name in self.headers_to_split_on:
# Check if line starts with a header that we intend to split on
if stripped_line.startswith(sep) and (
# Header with no text OR header is followed by space
# Both are valid conditions that sep is being used a header
len(stripped_line) == len(sep) or stripped_line[len(sep)] == " "
):
# Ensure we are tracking the header as metadata
if name is not None:
# Get the current header level
current_header_level = sep.count("#")
# Pop out headers of lower or same level from the stack
while header_stack and header_stack[-1]["level"] >= current_header_level:
# We have encountered a new header
# at the same or higher level
popped_header = header_stack.pop()
# Clear the metadata for the
# popped header in initial_metadata
if popped_header["name"] in initial_metadata:
initial_metadata.pop(popped_header["name"])
# Push the current header to the stack
header: HeaderType = {
"level": current_header_level,
"name": name,
"data": stripped_line[len(sep) :].strip(),
}
header_stack.append(header)
# Update initial_metadata with the current header
initial_metadata[name] = header["data"]
# Add the previous line to the lines_with_metadata
# only if current_content is not empty
if current_content:
lines_with_metadata.append(
{
"content": "\n".join(current_content),
"metadata": current_metadata.copy(),
}
)
current_content.clear()
break
else:
if stripped_line:
current_content.append(stripped_line)
elif current_content:
lines_with_metadata.append(
{
"content": "\n".join(current_content),
"metadata": current_metadata.copy(),
}
)
current_content.clear()
current_metadata = initial_metadata.copy()
if current_content:
lines_with_metadata.append({"content": "\n".join(current_content), "metadata": current_metadata})
# lines_with_metadata has each line with associated header metadata
# aggregate these into chunks based on common metadata
if not self.return_each_line:
return self.aggregate_lines_to_chunks(lines_with_metadata)
else:
return [
Document(page_content=chunk["content"], metadata=chunk["metadata"]) for chunk in lines_with_metadata
]
# should be in newer Python versions (3.10+)
# @dataclass(frozen=True, kw_only=True, slots=True)
@dataclass(frozen=True)
class Tokenizer:

View File

@@ -1,79 +0,0 @@
from typing import Optional
from pydantic import BaseModel
from core.app.entities.app_invoke_entities import InvokeFrom
from core.workflow.nodes.base import BaseIterationState, BaseLoopState, BaseNode
from models.enums import UserFrom
from models.workflow import Workflow, WorkflowType
from .node_entities import NodeRunResult
from .variable_pool import VariablePool
class WorkflowNodeAndResult:
node: BaseNode
result: Optional[NodeRunResult] = None
def __init__(self, node: BaseNode, result: Optional[NodeRunResult] = None):
self.node = node
self.result = result
class WorkflowRunState:
tenant_id: str
app_id: str
workflow_id: str
workflow_type: WorkflowType
user_id: str
user_from: UserFrom
invoke_from: InvokeFrom
workflow_call_depth: int
start_at: float
variable_pool: VariablePool
total_tokens: int = 0
workflow_nodes_and_results: list[WorkflowNodeAndResult]
class NodeRun(BaseModel):
node_id: str
iteration_node_id: str
loop_node_id: str
workflow_node_runs: list[NodeRun]
workflow_node_steps: int
current_iteration_state: Optional[BaseIterationState]
current_loop_state: Optional[BaseLoopState]
def __init__(
self,
workflow: Workflow,
start_at: float,
variable_pool: VariablePool,
user_id: str,
user_from: UserFrom,
invoke_from: InvokeFrom,
workflow_call_depth: int,
):
self.workflow_id = workflow.id
self.tenant_id = workflow.tenant_id
self.app_id = workflow.app_id
self.workflow_type = WorkflowType.value_of(workflow.type)
self.user_id = user_id
self.user_from = user_from
self.invoke_from = invoke_from
self.workflow_call_depth = workflow_call_depth
self.start_at = start_at
self.variable_pool = variable_pool
self.total_tokens = 0
self.workflow_node_steps = 1
self.workflow_node_runs = []
self.current_iteration_state = None
self.current_loop_state = None

View File

@@ -1,4 +1,3 @@
import json
from collections.abc import Mapping
from typing import Any
@@ -8,18 +7,6 @@ from core.file.models import File
from core.variables import Segment
class WorkflowRuntimeTypeEncoder(json.JSONEncoder):
def default(self, o: Any):
if isinstance(o, Segment):
return o.value
elif isinstance(o, File):
return o.to_dict()
elif isinstance(o, BaseModel):
return o.model_dump(mode="json")
else:
return super().default(o)
class WorkflowRuntimeTypeConverter:
def to_json_encodable(self, value: Mapping[str, Any] | None) -> Mapping[str, Any] | None:
result = self._to_json_encodable_recursive(value)

View File

@@ -148,25 +148,6 @@ class StrLen:
return value
class FloatRange:
"""Restrict input to an float in a range (inclusive)"""
def __init__(self, low, high, argument="argument"):
self.low = low
self.high = high
self.argument = argument
def __call__(self, value):
value = _get_float(value)
if value < self.low or value > self.high:
error = "Invalid {arg}: {val}. {arg} must be within the range {lo} - {hi}".format(
arg=self.argument, val=value, lo=self.low, hi=self.high
)
raise ValueError(error)
return value
class DatetimeString:
def __init__(self, format, argument="argument"):
self.format = format

View File

@@ -1,11 +0,0 @@
import json
from pydantic import BaseModel
class PydanticModelEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, BaseModel):
return o.model_dump()
else:
super().default(o)

View File

@@ -610,14 +610,6 @@ class InstalledApp(Base):
return tenant
class ConversationSource(StrEnum):
"""This enumeration is designed for use with `Conversation.from_source`."""
# NOTE(QuantumGhost): The enumeration members may not cover all possible cases.
API = "api"
CONSOLE = "console"
class Conversation(Base):
__tablename__ = "conversations"
__table_args__ = (

View File

@@ -4,13 +4,6 @@ from typing import Literal, Optional
from pydantic import BaseModel
class SegmentUpdateEntity(BaseModel):
content: str
answer: Optional[str] = None
keywords: Optional[list[str]] = None
enabled: Optional[bool] = None
class ParentMode(StrEnum):
FULL_DOC = "full-doc"
PARAGRAPH = "paragraph"
@@ -153,10 +146,6 @@ class MetadataUpdateArgs(BaseModel):
value: Optional[str | int | float] = None
class MetadataValueUpdateArgs(BaseModel):
fields: list[MetadataUpdateArgs]
class MetadataDetail(BaseModel):
id: str
name: str