feat: introduce new env ALLOW_UNSAFE_DATA_SCHEME to allow rendering data uri scheme (#21321)

This commit is contained in:
kurokobo
2025-07-09 11:12:40 +09:00
committed by GitHub
parent 521488f926
commit e39236186d
10 changed files with 22 additions and 1 deletions

View File

@@ -32,6 +32,9 @@ NEXT_PUBLIC_CSP_WHITELIST=
# Default is not allow to embed into iframe to prevent Clickjacking: https://owasp.org/www-community/attacks/Clickjacking
NEXT_PUBLIC_ALLOW_EMBED=
# Allow rendering unsafe URLs which have "data:" scheme.
NEXT_PUBLIC_ALLOW_UNSAFE_DATA_SCHEME=false
# Github Access Token, used for invoking Github API
NEXT_PUBLIC_GITHUB_ACCESS_TOKEN=
# The maximum number of top-k value for RAG.

View File

@@ -1,3 +1,7 @@
import { ALLOW_UNSAFE_DATA_SCHEME } from '@/config'
export const isValidUrl = (url: string): boolean => {
return ['http:', 'https:', '//', 'mailto:'].some(prefix => url.startsWith(prefix))
const validPrefixes = ['http:', 'https:', '//', 'mailto:']
if (ALLOW_UNSAFE_DATA_SCHEME) validPrefixes.push('data:')
return validPrefixes.some(prefix => url.startsWith(prefix))
}

View File

@@ -4,6 +4,7 @@
* Includes preprocessing for LaTeX and custom "think" tags.
*/
import { flow } from 'lodash-es'
import { ALLOW_UNSAFE_DATA_SCHEME } from '@/config'
export const preprocessLaTeX = (content: string) => {
if (typeof content !== 'string')
@@ -86,5 +87,8 @@ export const customUrlTransform = (uri: string): string | undefined => {
if (PERMITTED_SCHEME_REGEX.test(scheme))
return uri
if (ALLOW_UNSAFE_DATA_SCHEME && scheme === 'data:')
return uri
return undefined
}

View File

@@ -44,6 +44,7 @@ const LocaleLayout = async ({
[DatasetAttr.DATA_PUBLIC_LOOP_NODE_MAX_COUNT]: process.env.NEXT_PUBLIC_LOOP_NODE_MAX_COUNT,
[DatasetAttr.DATA_PUBLIC_MAX_ITERATIONS_NUM]: process.env.NEXT_PUBLIC_MAX_ITERATIONS_NUM,
[DatasetAttr.DATA_PUBLIC_MAX_TREE_DEPTH]: process.env.NEXT_PUBLIC_MAX_TREE_DEPTH,
[DatasetAttr.DATA_PUBLIC_ALLOW_UNSAFE_DATA_SCHEME]: process.env.NEXT_PUBLIC_ALLOW_UNSAFE_DATA_SCHEME,
[DatasetAttr.DATA_PUBLIC_ENABLE_WEBSITE_JINAREADER]: process.env.NEXT_PUBLIC_ENABLE_WEBSITE_JINAREADER,
[DatasetAttr.DATA_PUBLIC_ENABLE_WEBSITE_FIRECRAWL]: process.env.NEXT_PUBLIC_ENABLE_WEBSITE_FIRECRAWL,
[DatasetAttr.DATA_PUBLIC_ENABLE_WEBSITE_WATERCRAWL]: process.env.NEXT_PUBLIC_ENABLE_WEBSITE_WATERCRAWL,

View File

@@ -270,6 +270,7 @@ export const LOOP_NODE_MAX_COUNT = getNumberConfig(process.env.NEXT_PUBLIC_LOOP_
export const MAX_ITERATIONS_NUM = getNumberConfig(process.env.NEXT_PUBLIC_MAX_ITERATIONS_NUM, DatasetAttr.DATA_PUBLIC_MAX_ITERATIONS_NUM, 99)
export const MAX_TREE_DEPTH = getNumberConfig(process.env.NEXT_PUBLIC_MAX_TREE_DEPTH, DatasetAttr.DATA_PUBLIC_MAX_TREE_DEPTH, 50)
export const ALLOW_UNSAFE_DATA_SCHEME = getBooleanConfig(process.env.NEXT_PUBLIC_ALLOW_UNSAFE_DATA_SCHEME, DatasetAttr.DATA_PUBLIC_ALLOW_UNSAFE_DATA_SCHEME, false)
export const ENABLE_WEBSITE_JINAREADER = getBooleanConfig(process.env.NEXT_PUBLIC_ENABLE_WEBSITE_JINAREADER, DatasetAttr.DATA_PUBLIC_ENABLE_WEBSITE_JINAREADER, true)
export const ENABLE_WEBSITE_FIRECRAWL = getBooleanConfig(process.env.NEXT_PUBLIC_ENABLE_WEBSITE_FIRECRAWL, DatasetAttr.DATA_PUBLIC_ENABLE_WEBSITE_FIRECRAWL, true)
export const ENABLE_WEBSITE_WATERCRAWL = getBooleanConfig(process.env.NEXT_PUBLIC_ENABLE_WEBSITE_WATERCRAWL, DatasetAttr.DATA_PUBLIC_ENABLE_WEBSITE_WATERCRAWL, false)

View File

@@ -26,6 +26,7 @@ export NEXT_TELEMETRY_DISABLED=${NEXT_TELEMETRY_DISABLED}
export NEXT_PUBLIC_TEXT_GENERATION_TIMEOUT_MS=${TEXT_GENERATION_TIMEOUT_MS}
export NEXT_PUBLIC_CSP_WHITELIST=${CSP_WHITELIST}
export NEXT_PUBLIC_ALLOW_EMBED=${ALLOW_EMBED}
export NEXT_PUBLIC_ALLOW_UNSAFE_DATA_SCHEME=${ALLOW_UNSAFE_DATA_SCHEME:-false}
export NEXT_PUBLIC_TOP_K_MAX_VALUE=${TOP_K_MAX_VALUE}
export NEXT_PUBLIC_INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH=${INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH}
export NEXT_PUBLIC_MAX_TOOLS_NUM=${MAX_TOOLS_NUM}

View File

@@ -116,6 +116,7 @@ export enum DatasetAttr {
DATA_PUBLIC_LOOP_NODE_MAX_COUNT = 'data-public-loop-node-max-count',
DATA_PUBLIC_MAX_ITERATIONS_NUM = 'data-public-max-iterations-num',
DATA_PUBLIC_MAX_TREE_DEPTH = 'data-public-max-tree-depth',
DATA_PUBLIC_ALLOW_UNSAFE_DATA_SCHEME = 'data-public-allow-unsafe-data-scheme',
DATA_PUBLIC_ENABLE_WEBSITE_JINAREADER = 'data-public-enable-website-jinareader',
DATA_PUBLIC_ENABLE_WEBSITE_FIRECRAWL = 'data-public-enable-website-firecrawl',
DATA_PUBLIC_ENABLE_WEBSITE_WATERCRAWL = 'data-public-enable-website-watercrawl',