feat: Persist Variables for Enhanced Debugging Workflow (#20699)

This pull request introduces a feature aimed at improving the debugging experience during workflow editing. With the addition of variable persistence, the system will automatically retain the output variables from previously executed nodes. These persisted variables can then be reused when debugging subsequent nodes, eliminating the need for repetitive manual input. By streamlining this aspect of the workflow, the feature minimizes user errors and significantly reduces debugging effort, offering a smoother and more efficient experience. Key highlights of this change: - Automatic persistence of output variables for executed nodes. - Reuse of persisted variables to simplify input steps for nodes requiring them (e.g., `code`, `template`, `variable_assigner`). - Enhanced debugging experience with reduced friction. Closes #19735.
2025-06-24 09:05:29 +08:00
parent 3113350e51
commit 10b738a296
106 changed files with 6025 additions and 718 deletions
--- a/api/factories/file_factory.py
+++ b/api/factories/file_factory.py
@@ -5,6 +5,7 @@ from typing import Any, cast

 import httpx
 from sqlalchemy import select
+from sqlalchemy.orm import Session

 from constants import AUDIO_EXTENSIONS, DOCUMENT_EXTENSIONS, IMAGE_EXTENSIONS, VIDEO_EXTENSIONS
 from core.file import File, FileBelongsTo, FileTransferMethod, FileType, FileUploadConfig, helpers
@@ -91,6 +92,8 @@ def build_from_mappings(
    tenant_id: str,
    strict_type_validation: bool = False,
 ) -> Sequence[File]:
+    # TODO(QuantumGhost): Performance concern - each mapping triggers a separate database query.
+    # Implement batch processing to reduce database load when handling multiple files.
    files = [
        build_from_mapping(
            mapping=mapping,
@@ -377,3 +380,75 @@ def _get_file_type_by_mimetype(mime_type: str) -> FileType | None:

 def get_file_type_by_mime_type(mime_type: str) -> FileType:
    return _get_file_type_by_mimetype(mime_type) or FileType.CUSTOM
+
+
+class StorageKeyLoader:
+    """FileKeyLoader load the storage key from database for a list of files.
+    This loader is batched, the
+    """
+
+    def __init__(self, session: Session, tenant_id: str) -> None:
+        self._session = session
+        self._tenant_id = tenant_id
+
+    def _load_upload_files(self, upload_file_ids: Sequence[uuid.UUID]) -> Mapping[uuid.UUID, UploadFile]:
+        stmt = select(UploadFile).where(
+            UploadFile.id.in_(upload_file_ids),
+            UploadFile.tenant_id == self._tenant_id,
+        )
+
+        return {uuid.UUID(i.id): i for i in self._session.scalars(stmt)}
+
+    def _load_tool_files(self, tool_file_ids: Sequence[uuid.UUID]) -> Mapping[uuid.UUID, ToolFile]:
+        stmt = select(ToolFile).where(
+            ToolFile.id.in_(tool_file_ids),
+            ToolFile.tenant_id == self._tenant_id,
+        )
+        return {uuid.UUID(i.id): i for i in self._session.scalars(stmt)}
+
+    def load_storage_keys(self, files: Sequence[File]):
+        """Loads storage keys for a sequence of files by retrieving the corresponding
+        `UploadFile` or `ToolFile` records from the database based on their transfer method.
+
+        This method doesn't modify the input sequence structure but updates the `_storage_key`
+        property of each file object by extracting the relevant key from its database record.
+
+        Performance note: This is a batched operation where database query count remains constant
+        regardless of input size. However, for optimal performance, input sequences should contain
+        fewer than 1000 files. For larger collections, split into smaller batches and process each
+        batch separately.
+        """
+
+        upload_file_ids: list[uuid.UUID] = []
+        tool_file_ids: list[uuid.UUID] = []
+        for file in files:
+            related_model_id = file.related_id
+            if file.related_id is None:
+                raise ValueError("file id should not be None.")
+            if file.tenant_id != self._tenant_id:
+                err_msg = (
+                    f"invalid file, expected tenant_id={self._tenant_id}, "
+                    f"got tenant_id={file.tenant_id}, file_id={file.id}, related_model_id={related_model_id}"
+                )
+                raise ValueError(err_msg)
+            model_id = uuid.UUID(related_model_id)
+
+            if file.transfer_method in (FileTransferMethod.LOCAL_FILE, FileTransferMethod.REMOTE_URL):
+                upload_file_ids.append(model_id)
+            elif file.transfer_method == FileTransferMethod.TOOL_FILE:
+                tool_file_ids.append(model_id)
+
+        tool_files = self._load_tool_files(tool_file_ids)
+        upload_files = self._load_upload_files(upload_file_ids)
+        for file in files:
+            model_id = uuid.UUID(file.related_id)
+            if file.transfer_method in (FileTransferMethod.LOCAL_FILE, FileTransferMethod.REMOTE_URL):
+                upload_file_row = upload_files.get(model_id)
+                if upload_file_row is None:
+                    raise ValueError(...)
+                file._storage_key = upload_file_row.key
+            elif file.transfer_method == FileTransferMethod.TOOL_FILE:
+                tool_file_row = tool_files.get(model_id)
+                if tool_file_row is None:
+                    raise ValueError(...)
+                file._storage_key = tool_file_row.file_key
--- a/api/factories/variable_factory.py
+++ b/api/factories/variable_factory.py
@@ -43,6 +43,10 @@ class UnsupportedSegmentTypeError(Exception):
    pass


+class TypeMismatchError(Exception):
+    pass
+
+
 # Define the constant
 SEGMENT_TO_VARIABLE_MAP = {
    StringSegment: StringVariable,
@@ -110,6 +114,10 @@ def _build_variable_from_mapping(*, mapping: Mapping[str, Any], selector: Sequen
    return cast(Variable, result)


+def infer_segment_type_from_value(value: Any, /) -> SegmentType:
+    return build_segment(value).value_type
+
+
 def build_segment(value: Any, /) -> Segment:
    if value is None:
        return NoneSegment()
@@ -140,10 +148,80 @@ def build_segment(value: Any, /) -> Segment:
            case SegmentType.NONE:
                return ArrayAnySegment(value=value)
            case _:
+                # This should be unreachable.
                raise ValueError(f"not supported value {value}")
    raise ValueError(f"not supported value {value}")


+def build_segment_with_type(segment_type: SegmentType, value: Any) -> Segment:
+    """
+    Build a segment with explicit type checking.
+
+    This function creates a segment from a value while enforcing type compatibility
+    with the specified segment_type. It provides stricter type validation compared
+    to the standard build_segment function.
+
+    Args:
+        segment_type: The expected SegmentType for the resulting segment
+        value: The value to be converted into a segment
+
+    Returns:
+        Segment: A segment instance of the appropriate type
+
+    Raises:
+        TypeMismatchError: If the value type doesn't match the expected segment_type
+
+    Special Cases:
+        - For empty list [] values, if segment_type is array[*], returns the corresponding array type
+        - Type validation is performed before segment creation
+
+    Examples:
+        >>> build_segment_with_type(SegmentType.STRING, "hello")
+        StringSegment(value="hello")
+
+        >>> build_segment_with_type(SegmentType.ARRAY_STRING, [])
+        ArrayStringSegment(value=[])
+
+        >>> build_segment_with_type(SegmentType.STRING, 123)
+        # Raises TypeMismatchError
+    """
+    # Handle None values
+    if value is None:
+        if segment_type == SegmentType.NONE:
+            return NoneSegment()
+        else:
+            raise TypeMismatchError(f"Expected {segment_type}, but got None")
+
+    # Handle empty list special case for array types
+    if isinstance(value, list) and len(value) == 0:
+        if segment_type == SegmentType.ARRAY_ANY:
+            return ArrayAnySegment(value=value)
+        elif segment_type == SegmentType.ARRAY_STRING:
+            return ArrayStringSegment(value=value)
+        elif segment_type == SegmentType.ARRAY_NUMBER:
+            return ArrayNumberSegment(value=value)
+        elif segment_type == SegmentType.ARRAY_OBJECT:
+            return ArrayObjectSegment(value=value)
+        elif segment_type == SegmentType.ARRAY_FILE:
+            return ArrayFileSegment(value=value)
+        else:
+            raise TypeMismatchError(f"Expected {segment_type}, but got empty list")
+
+    # Build segment using existing logic to infer actual type
+    inferred_segment = build_segment(value)
+    inferred_type = inferred_segment.value_type
+
+    # Type compatibility checking
+    if inferred_type == segment_type:
+        return inferred_segment
+
+    # Type mismatch - raise error with descriptive message
+    raise TypeMismatchError(
+        f"Type mismatch: expected {segment_type}, but value '{value}' "
+        f"(type: {type(value).__name__}) corresponds to {inferred_type}"
+    )
+
+
 def segment_to_variable(
    *,
    segment: Segment,