diff --git a/docker/.env.example b/docker/.env.example index e7dbecb41..a403f25cb 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -826,6 +826,9 @@ MAX_ITERATIONS_NUM=99 # The timeout for the text generation in millisecond TEXT_GENERATION_TIMEOUT_MS=60000 +# Allow rendering unsafe URLs which have "data:" scheme. +ALLOW_UNSAFE_DATA_SCHEME=false + # ------------------------------ # Environment Variables for db Service # ------------------------------ diff --git a/docker/docker-compose-template.yaml b/docker/docker-compose-template.yaml index a34f96e94..fd7c78c7e 100644 --- a/docker/docker-compose-template.yaml +++ b/docker/docker-compose-template.yaml @@ -67,6 +67,7 @@ services: TEXT_GENERATION_TIMEOUT_MS: ${TEXT_GENERATION_TIMEOUT_MS:-60000} CSP_WHITELIST: ${CSP_WHITELIST:-} ALLOW_EMBED: ${ALLOW_EMBED:-false} + ALLOW_UNSAFE_DATA_SCHEME: ${ALLOW_UNSAFE_DATA_SCHEME:-false} MARKETPLACE_API_URL: ${MARKETPLACE_API_URL:-https://marketplace.dify.ai} MARKETPLACE_URL: ${MARKETPLACE_URL:-https://marketplace.dify.ai} TOP_K_MAX_VALUE: ${TOP_K_MAX_VALUE:-} diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index e48b5afd8..0a95251ff 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -364,6 +364,7 @@ x-shared-env: &shared-api-worker-env MAX_PARALLEL_LIMIT: ${MAX_PARALLEL_LIMIT:-10} MAX_ITERATIONS_NUM: ${MAX_ITERATIONS_NUM:-99} TEXT_GENERATION_TIMEOUT_MS: ${TEXT_GENERATION_TIMEOUT_MS:-60000} + ALLOW_UNSAFE_DATA_SCHEME: ${ALLOW_UNSAFE_DATA_SCHEME:-false} POSTGRES_USER: ${POSTGRES_USER:-${DB_USERNAME}} POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-${DB_PASSWORD}} POSTGRES_DB: ${POSTGRES_DB:-${DB_DATABASE}} @@ -582,6 +583,7 @@ services: TEXT_GENERATION_TIMEOUT_MS: ${TEXT_GENERATION_TIMEOUT_MS:-60000} CSP_WHITELIST: ${CSP_WHITELIST:-} ALLOW_EMBED: ${ALLOW_EMBED:-false} + ALLOW_UNSAFE_DATA_SCHEME: ${ALLOW_UNSAFE_DATA_SCHEME:-false} MARKETPLACE_API_URL: ${MARKETPLACE_API_URL:-https://marketplace.dify.ai} MARKETPLACE_URL: ${MARKETPLACE_URL:-https://marketplace.dify.ai} TOP_K_MAX_VALUE: ${TOP_K_MAX_VALUE:-} diff --git a/web/.env.example b/web/.env.example index c30064ffe..37bfc939e 100644 --- a/web/.env.example +++ b/web/.env.example @@ -32,6 +32,9 @@ NEXT_PUBLIC_CSP_WHITELIST= # Default is not allow to embed into iframe to prevent Clickjacking: https://owasp.org/www-community/attacks/Clickjacking NEXT_PUBLIC_ALLOW_EMBED= +# Allow rendering unsafe URLs which have "data:" scheme. +NEXT_PUBLIC_ALLOW_UNSAFE_DATA_SCHEME=false + # Github Access Token, used for invoking Github API NEXT_PUBLIC_GITHUB_ACCESS_TOKEN= # The maximum number of top-k value for RAG. diff --git a/web/app/components/base/markdown-blocks/utils.ts b/web/app/components/base/markdown-blocks/utils.ts index 4e9e98dbe..d8df76aef 100644 --- a/web/app/components/base/markdown-blocks/utils.ts +++ b/web/app/components/base/markdown-blocks/utils.ts @@ -1,3 +1,7 @@ +import { ALLOW_UNSAFE_DATA_SCHEME } from '@/config' + export const isValidUrl = (url: string): boolean => { - return ['http:', 'https:', '//', 'mailto:'].some(prefix => url.startsWith(prefix)) + const validPrefixes = ['http:', 'https:', '//', 'mailto:'] + if (ALLOW_UNSAFE_DATA_SCHEME) validPrefixes.push('data:') + return validPrefixes.some(prefix => url.startsWith(prefix)) } diff --git a/web/app/components/base/markdown/markdown-utils.ts b/web/app/components/base/markdown/markdown-utils.ts index 209fcd0b3..0089bef0a 100644 --- a/web/app/components/base/markdown/markdown-utils.ts +++ b/web/app/components/base/markdown/markdown-utils.ts @@ -4,6 +4,7 @@ * Includes preprocessing for LaTeX and custom "think" tags. */ import { flow } from 'lodash-es' +import { ALLOW_UNSAFE_DATA_SCHEME } from '@/config' export const preprocessLaTeX = (content: string) => { if (typeof content !== 'string') @@ -86,5 +87,8 @@ export const customUrlTransform = (uri: string): string | undefined => { if (PERMITTED_SCHEME_REGEX.test(scheme)) return uri + if (ALLOW_UNSAFE_DATA_SCHEME && scheme === 'data:') + return uri + return undefined } diff --git a/web/app/layout.tsx b/web/app/layout.tsx index f3b2ca7f7..525445db3 100644 --- a/web/app/layout.tsx +++ b/web/app/layout.tsx @@ -44,6 +44,7 @@ const LocaleLayout = async ({ [DatasetAttr.DATA_PUBLIC_LOOP_NODE_MAX_COUNT]: process.env.NEXT_PUBLIC_LOOP_NODE_MAX_COUNT, [DatasetAttr.DATA_PUBLIC_MAX_ITERATIONS_NUM]: process.env.NEXT_PUBLIC_MAX_ITERATIONS_NUM, [DatasetAttr.DATA_PUBLIC_MAX_TREE_DEPTH]: process.env.NEXT_PUBLIC_MAX_TREE_DEPTH, + [DatasetAttr.DATA_PUBLIC_ALLOW_UNSAFE_DATA_SCHEME]: process.env.NEXT_PUBLIC_ALLOW_UNSAFE_DATA_SCHEME, [DatasetAttr.DATA_PUBLIC_ENABLE_WEBSITE_JINAREADER]: process.env.NEXT_PUBLIC_ENABLE_WEBSITE_JINAREADER, [DatasetAttr.DATA_PUBLIC_ENABLE_WEBSITE_FIRECRAWL]: process.env.NEXT_PUBLIC_ENABLE_WEBSITE_FIRECRAWL, [DatasetAttr.DATA_PUBLIC_ENABLE_WEBSITE_WATERCRAWL]: process.env.NEXT_PUBLIC_ENABLE_WEBSITE_WATERCRAWL, diff --git a/web/config/index.ts b/web/config/index.ts index daaed412d..af9a21e60 100644 --- a/web/config/index.ts +++ b/web/config/index.ts @@ -270,6 +270,7 @@ export const LOOP_NODE_MAX_COUNT = getNumberConfig(process.env.NEXT_PUBLIC_LOOP_ export const MAX_ITERATIONS_NUM = getNumberConfig(process.env.NEXT_PUBLIC_MAX_ITERATIONS_NUM, DatasetAttr.DATA_PUBLIC_MAX_ITERATIONS_NUM, 99) export const MAX_TREE_DEPTH = getNumberConfig(process.env.NEXT_PUBLIC_MAX_TREE_DEPTH, DatasetAttr.DATA_PUBLIC_MAX_TREE_DEPTH, 50) +export const ALLOW_UNSAFE_DATA_SCHEME = getBooleanConfig(process.env.NEXT_PUBLIC_ALLOW_UNSAFE_DATA_SCHEME, DatasetAttr.DATA_PUBLIC_ALLOW_UNSAFE_DATA_SCHEME, false) export const ENABLE_WEBSITE_JINAREADER = getBooleanConfig(process.env.NEXT_PUBLIC_ENABLE_WEBSITE_JINAREADER, DatasetAttr.DATA_PUBLIC_ENABLE_WEBSITE_JINAREADER, true) export const ENABLE_WEBSITE_FIRECRAWL = getBooleanConfig(process.env.NEXT_PUBLIC_ENABLE_WEBSITE_FIRECRAWL, DatasetAttr.DATA_PUBLIC_ENABLE_WEBSITE_FIRECRAWL, true) export const ENABLE_WEBSITE_WATERCRAWL = getBooleanConfig(process.env.NEXT_PUBLIC_ENABLE_WEBSITE_WATERCRAWL, DatasetAttr.DATA_PUBLIC_ENABLE_WEBSITE_WATERCRAWL, false) diff --git a/web/docker/entrypoint.sh b/web/docker/entrypoint.sh index c9acbdd42..ef13011a7 100755 --- a/web/docker/entrypoint.sh +++ b/web/docker/entrypoint.sh @@ -26,6 +26,7 @@ export NEXT_TELEMETRY_DISABLED=${NEXT_TELEMETRY_DISABLED} export NEXT_PUBLIC_TEXT_GENERATION_TIMEOUT_MS=${TEXT_GENERATION_TIMEOUT_MS} export NEXT_PUBLIC_CSP_WHITELIST=${CSP_WHITELIST} export NEXT_PUBLIC_ALLOW_EMBED=${ALLOW_EMBED} +export NEXT_PUBLIC_ALLOW_UNSAFE_DATA_SCHEME=${ALLOW_UNSAFE_DATA_SCHEME:-false} export NEXT_PUBLIC_TOP_K_MAX_VALUE=${TOP_K_MAX_VALUE} export NEXT_PUBLIC_INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH=${INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH} export NEXT_PUBLIC_MAX_TOOLS_NUM=${MAX_TOOLS_NUM} diff --git a/web/types/feature.ts b/web/types/feature.ts index b2a80dbea..5787c2661 100644 --- a/web/types/feature.ts +++ b/web/types/feature.ts @@ -116,6 +116,7 @@ export enum DatasetAttr { DATA_PUBLIC_LOOP_NODE_MAX_COUNT = 'data-public-loop-node-max-count', DATA_PUBLIC_MAX_ITERATIONS_NUM = 'data-public-max-iterations-num', DATA_PUBLIC_MAX_TREE_DEPTH = 'data-public-max-tree-depth', + DATA_PUBLIC_ALLOW_UNSAFE_DATA_SCHEME = 'data-public-allow-unsafe-data-scheme', DATA_PUBLIC_ENABLE_WEBSITE_JINAREADER = 'data-public-enable-website-jinareader', DATA_PUBLIC_ENABLE_WEBSITE_FIRECRAWL = 'data-public-enable-website-firecrawl', DATA_PUBLIC_ENABLE_WEBSITE_WATERCRAWL = 'data-public-enable-website-watercrawl',