chore: translate Chinese comments to English in ClickZetta Volume storage module (#25037)

This commit is contained in:
-LAN-
2025-09-03 10:57:58 +08:00
committed by GitHub
parent 60c5bdd62f
commit c3820f55f4
3 changed files with 205 additions and 205 deletions

View File

@@ -1,7 +1,7 @@
"""ClickZetta Volume文件生命周期管理
"""ClickZetta Volume file lifecycle management
该模块提供文件版本控制、自动清理、备份和恢复等生命周期管理功能。
支持知识库文件的完整生命周期管理。
This module provides file lifecycle management features including version control, automatic cleanup, backup and restore.
Supports complete lifecycle management for knowledge base files.
"""
import json
@@ -15,17 +15,17 @@ logger = logging.getLogger(__name__)
class FileStatus(Enum):
"""文件状态枚举"""
"""File status enumeration"""
ACTIVE = "active" # 活跃状态
ARCHIVED = "archived" # 已归档
DELETED = "deleted" # 已删除(软删除)
BACKUP = "backup" # 备份文件
ACTIVE = "active" # Active status
ARCHIVED = "archived" # Archived
DELETED = "deleted" # Deleted (soft delete)
BACKUP = "backup" # Backup file
@dataclass
class FileMetadata:
"""文件元数据"""
"""File metadata"""
filename: str
size: int | None
@@ -38,7 +38,7 @@ class FileMetadata:
parent_version: Optional[int] = None
def to_dict(self) -> dict:
"""转换为字典格式"""
"""Convert to dictionary format"""
data = asdict(self)
data["created_at"] = self.created_at.isoformat()
data["modified_at"] = self.modified_at.isoformat()
@@ -47,7 +47,7 @@ class FileMetadata:
@classmethod
def from_dict(cls, data: dict) -> "FileMetadata":
"""从字典创建实例"""
"""Create instance from dictionary"""
data = data.copy()
data["created_at"] = datetime.fromisoformat(data["created_at"])
data["modified_at"] = datetime.fromisoformat(data["modified_at"])
@@ -56,14 +56,14 @@ class FileMetadata:
class FileLifecycleManager:
"""文件生命周期管理器"""
"""File lifecycle manager"""
def __init__(self, storage, dataset_id: Optional[str] = None):
"""初始化生命周期管理器
"""Initialize lifecycle manager
Args:
storage: ClickZetta Volume存储实例
dataset_id: 数据集ID用于Table Volume
storage: ClickZetta Volume storage instance
dataset_id: Dataset ID (for Table Volume)
"""
self._storage = storage
self._dataset_id = dataset_id
@@ -72,21 +72,21 @@ class FileLifecycleManager:
self._backup_prefix = ".backups/"
self._deleted_prefix = ".deleted/"
# 获取权限管理器(如果存在)
# Get permission manager (if exists)
self._permission_manager: Optional[Any] = getattr(storage, "_permission_manager", None)
def save_with_lifecycle(self, filename: str, data: bytes, tags: Optional[dict[str, str]] = None) -> FileMetadata:
"""保存文件并管理生命周期
"""Save file and manage lifecycle
Args:
filename: 文件名
data: 文件内容
tags: 文件标签
filename: File name
data: File content
tags: File tags
Returns:
文件元数据
File metadata
"""
# 权限检查
# Permission check
if not self._check_permission(filename, "save"):
from .volume_permissions import VolumePermissionError
@@ -98,28 +98,28 @@ class FileLifecycleManager:
)
try:
# 1. 检查是否存在旧版本
# 1. Check if old version exists
metadata_dict = self._load_metadata()
current_metadata = metadata_dict.get(filename)
# 2. 如果存在旧版本,创建版本备份
# 2. If old version exists, create version backup
if current_metadata:
self._create_version_backup(filename, current_metadata)
# 3. 计算文件信息
# 3. Calculate file information
now = datetime.now()
checksum = self._calculate_checksum(data)
new_version = (current_metadata["version"] + 1) if current_metadata else 1
# 4. 保存新文件
# 4. Save new file
self._storage.save(filename, data)
# 5. 创建元数据
# 5. Create metadata
created_at = now
parent_version = None
if current_metadata:
# 如果created_at是字符串,转换为datetime
# If created_at is string, convert to datetime
if isinstance(current_metadata["created_at"], str):
created_at = datetime.fromisoformat(current_metadata["created_at"])
else:
@@ -138,7 +138,7 @@ class FileLifecycleManager:
parent_version=parent_version,
)
# 6. 更新元数据
# 6. Update metadata
metadata_dict[filename] = file_metadata.to_dict()
self._save_metadata(metadata_dict)
@@ -150,13 +150,13 @@ class FileLifecycleManager:
raise
def get_file_metadata(self, filename: str) -> Optional[FileMetadata]:
"""获取文件元数据
"""Get file metadata
Args:
filename: 文件名
filename: File name
Returns:
文件元数据如果不存在返回None
File metadata, returns None if not exists
"""
try:
metadata_dict = self._load_metadata()
@@ -168,37 +168,37 @@ class FileLifecycleManager:
return None
def list_file_versions(self, filename: str) -> list[FileMetadata]:
"""列出文件的所有版本
"""List all versions of a file
Args:
filename: 文件名
filename: File name
Returns:
文件版本列表,按版本号排序
File version list, sorted by version number
"""
try:
versions = []
# 获取当前版本
# Get current version
current_metadata = self.get_file_metadata(filename)
if current_metadata:
versions.append(current_metadata)
# 获取历史版本
# Get historical versions
try:
version_files = self._storage.scan(self._dataset_id or "", files=True)
for file_path in version_files:
if file_path.startswith(f"{self._version_prefix}{filename}.v"):
# 解析版本号
# Parse version number
version_str = file_path.split(".v")[-1].split(".")[0]
try:
version_num = int(version_str)
# 这里简化处理,实际应该从版本文件中读取元数据
# 暂时创建基本的元数据信息
# Simplified processing here, should actually read metadata from version file
# Temporarily create basic metadata information
except ValueError:
continue
except:
# 如果无法扫描版本文件,只返回当前版本
# If cannot scan version files, only return current version
pass
return sorted(versions, key=lambda x: x.version or 0, reverse=True)
@@ -208,32 +208,32 @@ class FileLifecycleManager:
return []
def restore_version(self, filename: str, version: int) -> bool:
"""恢复文件到指定版本
"""Restore file to specified version
Args:
filename: 文件名
version: 要恢复的版本号
filename: File name
version: Version number to restore
Returns:
恢复是否成功
Whether restore succeeded
"""
try:
version_filename = f"{self._version_prefix}{filename}.v{version}"
# 检查版本文件是否存在
# Check if version file exists
if not self._storage.exists(version_filename):
logger.warning("Version %s of %s not found", version, filename)
return False
# 读取版本文件内容
# Read version file content
version_data = self._storage.load_once(version_filename)
# 保存当前版本为备份
# Save current version as backup
current_metadata = self.get_file_metadata(filename)
if current_metadata:
self._create_version_backup(filename, current_metadata.to_dict())
# 恢复文件
# Restore file
self.save_with_lifecycle(filename, version_data, {"restored_from": str(version)})
return True
@@ -242,21 +242,21 @@ class FileLifecycleManager:
return False
def archive_file(self, filename: str) -> bool:
"""归档文件
"""Archive file
Args:
filename: 文件名
filename: File name
Returns:
归档是否成功
Whether archive succeeded
"""
# 权限检查
# Permission check
if not self._check_permission(filename, "archive"):
logger.warning("Permission denied for archive operation on file: %s", filename)
return False
try:
# 更新文件状态为归档
# Update file status to archived
metadata_dict = self._load_metadata()
if filename not in metadata_dict:
logger.warning("File %s not found in metadata", filename)
@@ -275,36 +275,36 @@ class FileLifecycleManager:
return False
def soft_delete_file(self, filename: str) -> bool:
"""软删除文件(移动到删除目录)
"""Soft delete file (move to deleted directory)
Args:
filename: 文件名
filename: File name
Returns:
删除是否成功
Whether delete succeeded
"""
# 权限检查
# Permission check
if not self._check_permission(filename, "delete"):
logger.warning("Permission denied for soft delete operation on file: %s", filename)
return False
try:
# 检查文件是否存在
# Check if file exists
if not self._storage.exists(filename):
logger.warning("File %s not found", filename)
return False
# 读取文件内容
# Read file content
file_data = self._storage.load_once(filename)
# 移动到删除目录
# Move to deleted directory
deleted_filename = f"{self._deleted_prefix}{filename}.{datetime.now().strftime('%Y%m%d_%H%M%S')}"
self._storage.save(deleted_filename, file_data)
# 删除原文件
# Delete original file
self._storage.delete(filename)
# 更新元数据
# Update metadata
metadata_dict = self._load_metadata()
if filename in metadata_dict:
metadata_dict[filename]["status"] = FileStatus.DELETED.value
@@ -319,27 +319,27 @@ class FileLifecycleManager:
return False
def cleanup_old_versions(self, max_versions: int = 5, max_age_days: int = 30) -> int:
"""清理旧版本文件
"""Cleanup old version files
Args:
max_versions: 保留的最大版本数
max_age_days: 版本文件的最大保留天数
max_versions: Maximum number of versions to keep
max_age_days: Maximum retention days for version files
Returns:
清理的文件数量
Number of files cleaned
"""
try:
cleaned_count = 0
# 获取所有版本文件
# Get all version files
try:
all_files = self._storage.scan(self._dataset_id or "", files=True)
version_files = [f for f in all_files if f.startswith(self._version_prefix)]
# 按文件分组
# Group by file
file_versions: dict[str, list[tuple[int, str]]] = {}
for version_file in version_files:
# 解析文件名和版本
# Parse filename and version
parts = version_file[len(self._version_prefix) :].split(".v")
if len(parts) >= 2:
base_filename = parts[0]
@@ -352,12 +352,12 @@ class FileLifecycleManager:
except ValueError:
continue
# 清理每个文件的旧版本
# Cleanup old versions for each file
for base_filename, versions in file_versions.items():
# 按版本号排序
# Sort by version number
versions.sort(key=lambda x: x[0], reverse=True)
# 保留最新的max_versions个版本删除其余的
# Keep the newest max_versions versions, delete the rest
if len(versions) > max_versions:
to_delete = versions[max_versions:]
for version_num, version_file in to_delete:
@@ -377,10 +377,10 @@ class FileLifecycleManager:
return 0
def get_storage_statistics(self) -> dict[str, Any]:
"""获取存储统计信息
"""Get storage statistics
Returns:
存储统计字典
Storage statistics dictionary
"""
try:
metadata_dict = self._load_metadata()
@@ -402,7 +402,7 @@ class FileLifecycleManager:
for filename, metadata in metadata_dict.items():
file_meta = FileMetadata.from_dict(metadata)
# 统计文件状态
# Count file status
if file_meta.status == FileStatus.ACTIVE:
stats["active_files"] = (stats["active_files"] or 0) + 1
elif file_meta.status == FileStatus.ARCHIVED:
@@ -410,13 +410,13 @@ class FileLifecycleManager:
elif file_meta.status == FileStatus.DELETED:
stats["deleted_files"] = (stats["deleted_files"] or 0) + 1
# 统计大小
# Count size
stats["total_size"] = (stats["total_size"] or 0) + (file_meta.size or 0)
# 统计版本
# Count versions
stats["versions_count"] = (stats["versions_count"] or 0) + (file_meta.version or 0)
# 找出最新和最旧的文件
# Find newest and oldest files
if oldest_date is None or file_meta.created_at < oldest_date:
oldest_date = file_meta.created_at
stats["oldest_file"] = filename
@@ -432,12 +432,12 @@ class FileLifecycleManager:
return {}
def _create_version_backup(self, filename: str, metadata: dict):
"""创建版本备份"""
"""Create version backup"""
try:
# 读取当前文件内容
# Read current file content
current_data = self._storage.load_once(filename)
# 保存为版本文件
# Save as version file
version_filename = f"{self._version_prefix}{filename}.v{metadata['version']}"
self._storage.save(version_filename, current_data)
@@ -447,7 +447,7 @@ class FileLifecycleManager:
logger.warning("Failed to create version backup for %s: %s", filename, e)
def _load_metadata(self) -> dict[str, Any]:
"""加载元数据文件"""
"""Load metadata file"""
try:
if self._storage.exists(self._metadata_file):
metadata_content = self._storage.load_once(self._metadata_file)
@@ -460,7 +460,7 @@ class FileLifecycleManager:
return {}
def _save_metadata(self, metadata_dict: dict):
"""保存元数据文件"""
"""Save metadata file"""
try:
metadata_content = json.dumps(metadata_dict, indent=2, ensure_ascii=False)
self._storage.save(self._metadata_file, metadata_content.encode("utf-8"))
@@ -470,45 +470,45 @@ class FileLifecycleManager:
raise
def _calculate_checksum(self, data: bytes) -> str:
"""计算文件校验和"""
"""Calculate file checksum"""
import hashlib
return hashlib.md5(data).hexdigest()
def _check_permission(self, filename: str, operation: str) -> bool:
"""检查文件操作权限
"""Check file operation permission
Args:
filename: 文件名
operation: 操作类型
filename: File name
operation: Operation type
Returns:
True if permission granted, False otherwise
"""
# 如果没有权限管理器,默认允许
# If no permission manager, allow by default
if not self._permission_manager:
return True
try:
# 根据操作类型映射到权限
# Map operation type to permission
operation_mapping = {
"save": "save",
"load": "load_once",
"delete": "delete",
"archive": "delete", # 归档需要删除权限
"restore": "save", # 恢复需要写权限
"cleanup": "delete", # 清理需要删除权限
"archive": "delete", # Archive requires delete permission
"restore": "save", # Restore requires write permission
"cleanup": "delete", # Cleanup requires delete permission
"read": "load_once",
"write": "save",
}
mapped_operation = operation_mapping.get(operation, operation)
# 检查权限
# Check permission
result = self._permission_manager.validate_operation(mapped_operation, self._dataset_id)
return bool(result)
except Exception as e:
logger.exception("Permission check failed for %s operation %s", filename, operation)
# 安全默认:权限检查失败时拒绝访问
# Safe default: deny access when permission check fails
return False