Feat elasticsearch japanese (#12194)

This commit is contained in:
Hiroshi Fujita
2025-01-08 13:35:41 +09:00
committed by GitHub
parent 6635c393e9
commit d2586278d6
7 changed files with 149 additions and 3 deletions

View File

@@ -383,7 +383,7 @@ SUPABASE_URL=your-server-url
# ------------------------------
# The type of vector store to use.
# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `tidb_vector`, `oracle`, `tencent`, `elasticsearch`, `analyticdb`, `couchbase`, `vikingdb`, `oceanbase`.
# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `tidb_vector`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `oceanbase`.
VECTOR_STORE=weaviate
# The Weaviate endpoint URL. Only available when VECTOR_STORE is `weaviate`.
@@ -512,7 +512,7 @@ TENCENT_VECTOR_DB_SHARD=1
TENCENT_VECTOR_DB_REPLICAS=2
# ElasticSearch configuration, only available when VECTOR_STORE is `elasticsearch`
ELASTICSEARCH_HOST=0.0.0.0
ELASTICSEARCH_HOST=elasticsearch
ELASTICSEARCH_PORT=9200
ELASTICSEARCH_USERNAME=elastic
ELASTICSEARCH_PASSWORD=elastic

View File

@@ -883,20 +883,28 @@ services:
container_name: elasticsearch
profiles:
- elasticsearch
- elasticsearch-ja
restart: always
volumes:
- ./elasticsearch/docker-entrypoint.sh:/docker-entrypoint-mount.sh
- dify_es01_data:/usr/share/elasticsearch/data
environment:
ELASTIC_PASSWORD: ${ELASTICSEARCH_PASSWORD:-elastic}
VECTOR_STORE: ${VECTOR_STORE:-}
cluster.name: dify-es-cluster
node.name: dify-es0
discovery.type: single-node
xpack.license.self_generated.type: trial
xpack.license.self_generated.type: basic
xpack.security.enabled: 'true'
xpack.security.enrollment.enabled: 'false'
xpack.security.http.ssl.enabled: 'false'
ports:
- ${ELASTICSEARCH_PORT:-9200}:9200
deploy:
resources:
limits:
memory: 2g
entrypoint: [ 'sh', '-c', "sh /docker-entrypoint-mount.sh" ]
healthcheck:
test: [ 'CMD', 'curl', '-s', 'http://localhost:9200/_cluster/health?pretty' ]
interval: 30s

View File

@@ -0,0 +1,25 @@
#!/bin/bash
set -e
if [ "${VECTOR_STORE}" = "elasticsearch-ja" ]; then
# Check if the ICU tokenizer plugin is installed
if ! /usr/share/elasticsearch/bin/elasticsearch-plugin list | grep -q analysis-icu; then
printf '%s\n' "Installing the ICU tokenizer plugin"
if ! /usr/share/elasticsearch/bin/elasticsearch-plugin install analysis-icu; then
printf '%s\n' "Failed to install the ICU tokenizer plugin"
exit 1
fi
fi
# Check if the Japanese language analyzer plugin is installed
if ! /usr/share/elasticsearch/bin/elasticsearch-plugin list | grep -q analysis-kuromoji; then
printf '%s\n' "Installing the Japanese language analyzer plugin"
if ! /usr/share/elasticsearch/bin/elasticsearch-plugin install analysis-kuromoji; then
printf '%s\n' "Failed to install the Japanese language analyzer plugin"
exit 1
fi
fi
fi
# Run the original entrypoint script
exec /bin/tini -- /usr/local/bin/docker-entrypoint.sh