Harden web search and docs defaults
This commit is contained in:
103
bin/context-kit
103
bin/context-kit
@@ -1,7 +1,17 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
SCRIPT_PATH="${BASH_SOURCE[0]}"
|
||||
while [[ -L "${SCRIPT_PATH}" ]]; do
|
||||
SCRIPT_DIR="$(cd -P "$(dirname "${SCRIPT_PATH}")" && pwd)"
|
||||
SCRIPT_TARGET="$(readlink "${SCRIPT_PATH}")"
|
||||
if [[ "${SCRIPT_TARGET}" = /* ]]; then
|
||||
SCRIPT_PATH="${SCRIPT_TARGET}"
|
||||
else
|
||||
SCRIPT_PATH="${SCRIPT_DIR}/${SCRIPT_TARGET}"
|
||||
fi
|
||||
done
|
||||
ROOT="$(cd -P "$(dirname "${SCRIPT_PATH}")/.." && pwd)"
|
||||
ENV_FILE="${ROOT}/.env"
|
||||
|
||||
load_env_file() {
|
||||
@@ -39,10 +49,19 @@ NETWORK="${CONTEXT_KIT_DOCKER_NETWORK:-${PROJECT}_default}"
|
||||
SEARXNG_PORT="${CONTEXT_KIT_SEARXNG_PORT:-8099}"
|
||||
DOCS_PORT="${CONTEXT_KIT_DOCS_PORT:-8776}"
|
||||
DOCS_HTTP_URL="${CONTEXT_KIT_DOCS_HTTP_URL:-http://127.0.0.1:${DOCS_PORT}/mcp}"
|
||||
WEB_SEARCH_MAX_BYTES="${CONTEXT_KIT_WEB_SEARCH_MAX_BYTES:-52428800}"
|
||||
WEB_SEARCH_PROVIDER="${CONTEXT_KIT_WEB_SEARCH_PROVIDER:-${DEFAULT_SEARCH_PROVIDER:-searxng}}"
|
||||
WEB_SEARCH_HTTP_TIMEOUT="${CONTEXT_KIT_WEB_SEARCH_HTTP_TIMEOUT:-${HTTP_TIMEOUT:-15000}}"
|
||||
WEB_SEARCH_MAX_RESULTS="${CONTEXT_KIT_WEB_SEARCH_MAX_RESULTS:-${MAX_RESULTS:-10}}"
|
||||
WEB_SEARCH_CHROME_PATH="${CONTEXT_KIT_WEB_SEARCH_CHROME_PATH:-${CHROME_PATH:-/usr/bin/chromium}}"
|
||||
WEB_SEARCH_BROWSER_USER_AGENT="${CONTEXT_KIT_WEB_SEARCH_BROWSER_USER_AGENT:-${BROWSER_SEARCH_USER_AGENT:-}}"
|
||||
WEB_SEARCH_MCP_COMPAT_MODE="${CONTEXT_KIT_WEB_SEARCH_MCP_COMPAT_MODE:-${MCP_COMPAT_MODE:-}}"
|
||||
DOCS_CONTAINER_NAME="context-kit-docs-mcp"
|
||||
DOCS_SOURCES_FILE="${DATA_DIR}/docs-sources.txt"
|
||||
DOCS_DATA_DIR="${DATA_DIR}/docs"
|
||||
MODELS_DATA_DIR="${DATA_DIR}/models"
|
||||
DOCS_LOCAL_SOURCES_DIR="${CONTEXT_KIT_DOCS_LOCAL_SOURCES_DIR:-${DATA_DIR}/local-sources}"
|
||||
DOCS_LOCAL_SOURCES_PORT="${CONTEXT_KIT_DOCS_LOCAL_SOURCES_PORT:-8769}"
|
||||
|
||||
WEB_SEARCH_IMAGE="${CONTEXT_KIT_WEB_SEARCH_IMAGE:-context-kit/web-search-mcp:latest}"
|
||||
DOCS_IMAGE="${CONTEXT_KIT_DOCS_IMAGE:-context-kit/docs-mcp:latest}"
|
||||
@@ -86,6 +105,8 @@ compose() {
|
||||
CONTEXT_KIT_DOCS_MAX_GET_BYTES="${CONTEXT_KIT_DOCS_MAX_GET_BYTES:-75000}" \
|
||||
CONTEXT_KIT_DOCS_EMBED_MODEL="${CONTEXT_KIT_DOCS_EMBED_MODEL:-BAAI/bge-small-en-v1.5}" \
|
||||
CONTEXT_KIT_DOCS_PREINDEX="${CONTEXT_KIT_DOCS_PREINDEX:-0}" \
|
||||
CONTEXT_KIT_DOCS_LOCAL_SOURCES_DIR="${DOCS_LOCAL_SOURCES_DIR}" \
|
||||
CONTEXT_KIT_DOCS_LOCAL_SOURCES_PORT="${DOCS_LOCAL_SOURCES_PORT}" \
|
||||
BUILDX_BUILDER="${CONTEXT_KIT_BUILDX_BUILDER:-${BUILDX_BUILDER:-default}}" \
|
||||
docker compose -p "${PROJECT}" -f "${COMPOSE_FILE}" "$@"
|
||||
}
|
||||
@@ -112,11 +133,12 @@ prepare_data_dirs() {
|
||||
ensure_writable_dir "${DATA_DIR}"
|
||||
ensure_writable_dir "${DOCS_DATA_DIR}"
|
||||
ensure_writable_dir "${MODELS_DATA_DIR}"
|
||||
ensure_writable_dir "${DOCS_LOCAL_SOURCES_DIR}"
|
||||
}
|
||||
|
||||
check_data_dirs() {
|
||||
local ok=0 dir
|
||||
for dir in "${DATA_DIR}" "${DOCS_DATA_DIR}" "${MODELS_DATA_DIR}"; do
|
||||
for dir in "${DATA_DIR}" "${DOCS_DATA_DIR}" "${MODELS_DATA_DIR}" "${DOCS_LOCAL_SOURCES_DIR}"; do
|
||||
if [[ ! -d "${dir}" ]]; then
|
||||
printf 'warn data directory missing: %s (run context-kit start)\n' "${dir}"
|
||||
elif [[ -w "${dir}" && -x "${dir}" ]]; then
|
||||
@@ -129,6 +151,41 @@ check_data_dirs() {
|
||||
return "${ok}"
|
||||
}
|
||||
|
||||
check_web_search_schema_patch() {
|
||||
docker run --rm --entrypoint node \
|
||||
-e MAX_BYTES="${WEB_SEARCH_MAX_BYTES}" \
|
||||
-e EXPECTED_MAX_BYTES="${WEB_SEARCH_MAX_BYTES}" \
|
||||
"${WEB_SEARCH_IMAGE}" \
|
||||
-e '
|
||||
const fs = require("node:fs");
|
||||
const expected = Number(process.env.EXPECTED_MAX_BYTES) || 0;
|
||||
const actual = Number(process.env.MAX_BYTES) || 0;
|
||||
const serverPath = "/usr/local/lib/node_modules/@zhafron/mcp-web-search/dist/src/server.js";
|
||||
const source = fs.readFileSync(serverPath, "utf8");
|
||||
if (actual !== expected) process.exit(1);
|
||||
if (!source.includes("max_download_bytes: z.number().int().min(1).max(MAX_BYTES).optional()")) process.exit(1);
|
||||
' >/dev/null 2>&1
|
||||
}
|
||||
|
||||
check_web_search_bing_override() {
|
||||
docker run --rm --entrypoint node \
|
||||
"${WEB_SEARCH_IMAGE}" \
|
||||
-e '
|
||||
const fs = require("node:fs");
|
||||
const bingPath = "/usr/local/lib/node_modules/@zhafron/mcp-web-search/dist/src/providers/bing.js";
|
||||
const source = fs.readFileSync(bingPath, "utf8");
|
||||
if (!source.includes("Context Kit override for @zhafron/mcp-web-search 1.3.0")) process.exit(1);
|
||||
if (!source.includes("waitForSelector")) process.exit(1);
|
||||
if (!source.includes("decodeBingRedirect")) process.exit(1);
|
||||
' >/dev/null 2>&1
|
||||
}
|
||||
|
||||
check_web_search_chrome() {
|
||||
docker run --rm --entrypoint /usr/bin/test \
|
||||
"${WEB_SEARCH_IMAGE}" \
|
||||
-x "${WEB_SEARCH_CHROME_PATH}" >/dev/null 2>&1
|
||||
}
|
||||
|
||||
warn() {
|
||||
printf 'warn: %s\n' "$*" >&2
|
||||
}
|
||||
@@ -257,9 +314,12 @@ cmd_status() {
|
||||
printf '\nImages\n'
|
||||
docker image ls --format '{{.Repository}}:{{.Tag}}\t{{.Size}}' \
|
||||
| grep -E '^(context-kit/|ghcr.io/yamadashy/repomix:)' || true
|
||||
printf '\nLabeled containers\n'
|
||||
docker ps -a --filter label=dev.context-kit=true --format 'table {{.Names}}\t{{.Status}}\t{{.Image}}'
|
||||
printf '\nDocs MCP endpoint\n- %s (container: %s)\n' "${DOCS_HTTP_URL}" "${DOCS_CONTAINER_NAME}"
|
||||
printf '\nDocs sources\n'
|
||||
resolved_sources | sed 's/^/- /'
|
||||
printf '\nLocal docs source directory\n- %s (served inside docs-mcp at http://127.0.0.1:%s/)\n' "${DOCS_LOCAL_SOURCES_DIR}" "${DOCS_LOCAL_SOURCES_PORT}"
|
||||
printf '\nData directory\n- %s\n' "${DATA_DIR}"
|
||||
}
|
||||
|
||||
@@ -303,6 +363,27 @@ cmd_doctor() {
|
||||
fi
|
||||
done
|
||||
|
||||
if docker image inspect "${WEB_SEARCH_IMAGE}" >/dev/null 2>&1; then
|
||||
if check_web_search_schema_patch; then
|
||||
printf 'pass web-search fetch_url max-bytes schema patch: %s\n' "${WEB_SEARCH_MAX_BYTES}"
|
||||
else
|
||||
printf 'fail web-search max-bytes schema patch missing; run: context-kit build\n'
|
||||
ok=1
|
||||
fi
|
||||
if check_web_search_bing_override; then
|
||||
printf 'pass web-search Bing provider override installed\n'
|
||||
else
|
||||
printf 'fail web-search Bing provider override missing; run: context-kit build\n'
|
||||
ok=1
|
||||
fi
|
||||
if check_web_search_chrome; then
|
||||
printf 'pass web-search Chromium path: %s\n' "${WEB_SEARCH_CHROME_PATH}"
|
||||
else
|
||||
printf 'fail web-search Chromium path unavailable: %s\n' "${WEB_SEARCH_CHROME_PATH}"
|
||||
ok=1
|
||||
fi
|
||||
fi
|
||||
|
||||
if command -v curl >/dev/null 2>&1 && curl -fsS "http://127.0.0.1:${SEARXNG_PORT}/healthz" >/dev/null 2>&1; then
|
||||
printf 'pass SearXNG responds on 127.0.0.1:%s\n' "${SEARXNG_PORT}"
|
||||
else
|
||||
@@ -331,11 +412,14 @@ cmd_web_search() {
|
||||
exec docker run --rm -i \
|
||||
--label dev.context-kit=true \
|
||||
--network "${NETWORK}" \
|
||||
-e DEFAULT_SEARCH_PROVIDER="${DEFAULT_SEARCH_PROVIDER:-searxng}" \
|
||||
-e DEFAULT_SEARCH_PROVIDER="${WEB_SEARCH_PROVIDER}" \
|
||||
-e SEARXNG_URL="${SEARXNG_URL:-http://searxng:8080}" \
|
||||
-e CHROME_PATH="${CHROME_PATH:-/usr/bin/chromium}" \
|
||||
-e HTTP_TIMEOUT="${HTTP_TIMEOUT:-15000}" \
|
||||
-e MAX_RESULTS="${MAX_RESULTS:-10}" \
|
||||
-e CHROME_PATH="${WEB_SEARCH_CHROME_PATH}" \
|
||||
-e HTTP_TIMEOUT="${WEB_SEARCH_HTTP_TIMEOUT}" \
|
||||
-e MAX_BYTES="${WEB_SEARCH_MAX_BYTES}" \
|
||||
-e MAX_RESULTS="${WEB_SEARCH_MAX_RESULTS}" \
|
||||
-e BROWSER_SEARCH_USER_AGENT="${WEB_SEARCH_BROWSER_USER_AGENT}" \
|
||||
-e MCP_COMPAT_MODE="${WEB_SEARCH_MCP_COMPAT_MODE}" \
|
||||
"${WEB_SEARCH_IMAGE}"
|
||||
}
|
||||
|
||||
@@ -397,12 +481,13 @@ print_opencode() {
|
||||
"type": "local",
|
||||
"command": ["${bin}", "web-search"],
|
||||
"enabled": true,
|
||||
"timeout": 60000
|
||||
"timeout": 150000
|
||||
},
|
||||
"context-docs": {
|
||||
"type": "remote",
|
||||
"url": "${url}",
|
||||
"enabled": true
|
||||
"enabled": true,
|
||||
"timeout": 150000
|
||||
},
|
||||
"context-repomix": {
|
||||
"type": "local",
|
||||
@@ -451,7 +536,7 @@ cmd_install() {
|
||||
|
||||
cmd_redaction_check() {
|
||||
local bad=0
|
||||
local local_path_terms='/(home|Users)/[^/[:space:]]+|[A-Za-z]:\\Users\\[^\\[:space:]]+'
|
||||
local local_path_terms='/(home|Users)/[^/[:space:]]+|/data/(projects|opencode-mcp)[^[:space:]]*|[A-Za-z]:\\Users\\[^\\[:space:]]+'
|
||||
local secret_terms='AKIA[0-9A-Z]{16}|BEGIN (RSA |OPENSSH |EC |DSA )?PRIVATE KEY|xox[baprs]-|sk-[A-Za-z0-9_-]{20,}|ghp_[A-Za-z0-9_]{20,}|github_pat_[A-Za-z0-9_]{20,}|glpat-[A-Za-z0-9_-]{20,}|gitea_[A-Za-z0-9_-]{20,}'
|
||||
|
||||
# Scan only what would be published: skip .git plus everything .gitignore
|
||||
|
||||
Reference in New Issue
Block a user