From 8237f1331c4574bd6f352481a08b72fd6646939f Mon Sep 17 00:00:00 2001 From: Ajay Krishnan Date: Wed, 24 Jun 2026 23:57:44 -0700 Subject: [PATCH] Harden web search and docs defaults --- .env.example | 21 ++- README.md | 11 +- bin/context-kit | 103 ++++++++++++-- compose.yml | 16 ++- config/sources.default.txt | 1 - docker/docs/.dockerignore | 1 + docker/docs/Dockerfile | 15 +- docker/docs/constraints.txt | 107 +++++++++++++++ docker/docs/entrypoint.sh | 38 +++++- docker/web-search/.dockerignore | 3 + docker/web-search/Dockerfile | 13 +- docker/web-search/overrides/bing.js | 114 ++++++++++++++++ docker/web-search/patch-mcp-web-search.mjs | 28 ++++ docker/web-search/searxng/settings.yml | 4 +- docs/configuration.md | 14 ++ docs/troubleshooting.md | 31 +++++ scripts/release-check | 49 +++++++ scripts/smoke-web-search.mjs | 152 +++++++++++++++++++++ snippets/opencode.json | 5 +- 19 files changed, 691 insertions(+), 35 deletions(-) create mode 100644 docker/docs/constraints.txt create mode 100644 docker/web-search/overrides/bing.js create mode 100644 docker/web-search/patch-mcp-web-search.mjs create mode 100755 scripts/release-check create mode 100644 scripts/smoke-web-search.mjs diff --git a/.env.example b/.env.example index 4a88880..db8751a 100644 --- a/.env.example +++ b/.env.example @@ -10,9 +10,19 @@ CONTEXT_KIT_COMPOSE_PROJECT=context-kit # Local SearXNG port. Bound to 127.0.0.1 only. CONTEXT_KIT_SEARXNG_PORT=8099 -# Local-only SearXNG secret. Set this to any random string if you expose SearXNG -# beyond localhost, which the default setup does not do. -CONTEXT_KIT_SEARXNG_SECRET=change-me-local-only +# Max bytes accepted and downloaded by context-web-search fetch_url. +# Keep this aligned with agent tool-call defaults to avoid schema rejections. +CONTEXT_KIT_WEB_SEARCH_MAX_BYTES=52428800 + +# Web-search defaults. Search uses SearXNG first, then falls back to +# DuckDuckGo and Bing. Bing requires Chromium inside the web-search image. +CONTEXT_KIT_WEB_SEARCH_PROVIDER=searxng +CONTEXT_KIT_WEB_SEARCH_HTTP_TIMEOUT=15000 +CONTEXT_KIT_WEB_SEARCH_MAX_RESULTS=10 +CONTEXT_KIT_WEB_SEARCH_CHROME_PATH=/usr/bin/chromium +# User agent used by the Chromium-backed Bing search fallback. +# CONTEXT_KIT_WEB_SEARCH_BROWSER_USER_AGENT="Mozilla/5.0 ..." +# CONTEXT_KIT_WEB_SEARCH_MCP_COMPAT_MODE=legacy # Long-lived context-docs HTTP MCP service. Bound to 127.0.0.1 only. CONTEXT_KIT_DOCS_PORT=8776 @@ -33,3 +43,8 @@ CONTEXT_KIT_DOCS_EMBED_MODEL=BAAI/bge-small-en-v1.5 # One or more source files, separated by spaces. CONTEXT_KIT_DOCS_SOURCES=config/sources.default.txt + +# Optional machine-local llms.txt tree. Files are served only inside docs-mcp at +# http://127.0.0.1:8769/ so absolute local paths do not leak into source files. +# CONTEXT_KIT_DOCS_LOCAL_SOURCES_DIR=/path/to/context-kit-local-sources +# CONTEXT_KIT_DOCS_LOCAL_SOURCES_PORT=8769 diff --git a/README.md b/README.md index 865d2a0..15dbe76 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ Context Kit gives coding agents three local MCP servers: | Server | Purpose | Default | |---|---|---| -| `context-web-search` | Current web search and URL fetch through local SearXNG | Enabled | +| `context-web-search` | Current web search through local SearXNG plus URL fetch/extract | Enabled | | `context-docs` | Semantic search over curated `llms.txt` documentation | Enabled | | `context-repomix` | Pack local or remote repositories into AI-friendly context | Enabled | @@ -56,6 +56,10 @@ config that will not be committed. ## Defaults - SearXNG binds to `127.0.0.1:8099` only. +- `context-web-search` defaults `search_web` to SearXNG, then falls back to + DuckDuckGo and Bing. Bing uses Chromium inside the web-search image. +- `fetch_url` uses upstream HTTP extraction. In `mcp-web-search` 1.3.0, + `engine=browser` is accepted but does not invoke Chromium yet. - `context-docs` runs as a long-lived service on `127.0.0.1:8776` (Streamable HTTP MCP) so every client shares one indexer and one Chroma writer. The `bin/context-kit docs` stdio command is kept as a compatibility shim for @@ -74,7 +78,6 @@ The default docs index is intentionally small: - Claude Code docs - OpenAI API docs and reference -- Anthropic docs - OpenRouter docs - Model Context Protocol docs @@ -91,8 +94,8 @@ CONTEXT_KIT_DOCS_SOURCES="config/sources.default.txt config/sources.js.txt" \ bin/context-kit docs ``` -Cloudflare is opt-in because it can expand to thousands of sections and take a -while to embed. +Large vendor feeds are opt-in because they can expand to thousands of sections +and take a while to embed. ## Commands diff --git a/bin/context-kit b/bin/context-kit index d85f507..2f23532 100755 --- a/bin/context-kit +++ b/bin/context-kit @@ -1,7 +1,17 @@ #!/usr/bin/env bash set -euo pipefail -ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +SCRIPT_PATH="${BASH_SOURCE[0]}" +while [[ -L "${SCRIPT_PATH}" ]]; do + SCRIPT_DIR="$(cd -P "$(dirname "${SCRIPT_PATH}")" && pwd)" + SCRIPT_TARGET="$(readlink "${SCRIPT_PATH}")" + if [[ "${SCRIPT_TARGET}" = /* ]]; then + SCRIPT_PATH="${SCRIPT_TARGET}" + else + SCRIPT_PATH="${SCRIPT_DIR}/${SCRIPT_TARGET}" + fi +done +ROOT="$(cd -P "$(dirname "${SCRIPT_PATH}")/.." && pwd)" ENV_FILE="${ROOT}/.env" load_env_file() { @@ -39,10 +49,19 @@ NETWORK="${CONTEXT_KIT_DOCKER_NETWORK:-${PROJECT}_default}" SEARXNG_PORT="${CONTEXT_KIT_SEARXNG_PORT:-8099}" DOCS_PORT="${CONTEXT_KIT_DOCS_PORT:-8776}" DOCS_HTTP_URL="${CONTEXT_KIT_DOCS_HTTP_URL:-http://127.0.0.1:${DOCS_PORT}/mcp}" +WEB_SEARCH_MAX_BYTES="${CONTEXT_KIT_WEB_SEARCH_MAX_BYTES:-52428800}" +WEB_SEARCH_PROVIDER="${CONTEXT_KIT_WEB_SEARCH_PROVIDER:-${DEFAULT_SEARCH_PROVIDER:-searxng}}" +WEB_SEARCH_HTTP_TIMEOUT="${CONTEXT_KIT_WEB_SEARCH_HTTP_TIMEOUT:-${HTTP_TIMEOUT:-15000}}" +WEB_SEARCH_MAX_RESULTS="${CONTEXT_KIT_WEB_SEARCH_MAX_RESULTS:-${MAX_RESULTS:-10}}" +WEB_SEARCH_CHROME_PATH="${CONTEXT_KIT_WEB_SEARCH_CHROME_PATH:-${CHROME_PATH:-/usr/bin/chromium}}" +WEB_SEARCH_BROWSER_USER_AGENT="${CONTEXT_KIT_WEB_SEARCH_BROWSER_USER_AGENT:-${BROWSER_SEARCH_USER_AGENT:-}}" +WEB_SEARCH_MCP_COMPAT_MODE="${CONTEXT_KIT_WEB_SEARCH_MCP_COMPAT_MODE:-${MCP_COMPAT_MODE:-}}" DOCS_CONTAINER_NAME="context-kit-docs-mcp" DOCS_SOURCES_FILE="${DATA_DIR}/docs-sources.txt" DOCS_DATA_DIR="${DATA_DIR}/docs" MODELS_DATA_DIR="${DATA_DIR}/models" +DOCS_LOCAL_SOURCES_DIR="${CONTEXT_KIT_DOCS_LOCAL_SOURCES_DIR:-${DATA_DIR}/local-sources}" +DOCS_LOCAL_SOURCES_PORT="${CONTEXT_KIT_DOCS_LOCAL_SOURCES_PORT:-8769}" WEB_SEARCH_IMAGE="${CONTEXT_KIT_WEB_SEARCH_IMAGE:-context-kit/web-search-mcp:latest}" DOCS_IMAGE="${CONTEXT_KIT_DOCS_IMAGE:-context-kit/docs-mcp:latest}" @@ -86,6 +105,8 @@ compose() { CONTEXT_KIT_DOCS_MAX_GET_BYTES="${CONTEXT_KIT_DOCS_MAX_GET_BYTES:-75000}" \ CONTEXT_KIT_DOCS_EMBED_MODEL="${CONTEXT_KIT_DOCS_EMBED_MODEL:-BAAI/bge-small-en-v1.5}" \ CONTEXT_KIT_DOCS_PREINDEX="${CONTEXT_KIT_DOCS_PREINDEX:-0}" \ + CONTEXT_KIT_DOCS_LOCAL_SOURCES_DIR="${DOCS_LOCAL_SOURCES_DIR}" \ + CONTEXT_KIT_DOCS_LOCAL_SOURCES_PORT="${DOCS_LOCAL_SOURCES_PORT}" \ BUILDX_BUILDER="${CONTEXT_KIT_BUILDX_BUILDER:-${BUILDX_BUILDER:-default}}" \ docker compose -p "${PROJECT}" -f "${COMPOSE_FILE}" "$@" } @@ -112,11 +133,12 @@ prepare_data_dirs() { ensure_writable_dir "${DATA_DIR}" ensure_writable_dir "${DOCS_DATA_DIR}" ensure_writable_dir "${MODELS_DATA_DIR}" + ensure_writable_dir "${DOCS_LOCAL_SOURCES_DIR}" } check_data_dirs() { local ok=0 dir - for dir in "${DATA_DIR}" "${DOCS_DATA_DIR}" "${MODELS_DATA_DIR}"; do + for dir in "${DATA_DIR}" "${DOCS_DATA_DIR}" "${MODELS_DATA_DIR}" "${DOCS_LOCAL_SOURCES_DIR}"; do if [[ ! -d "${dir}" ]]; then printf 'warn data directory missing: %s (run context-kit start)\n' "${dir}" elif [[ -w "${dir}" && -x "${dir}" ]]; then @@ -129,6 +151,41 @@ check_data_dirs() { return "${ok}" } +check_web_search_schema_patch() { + docker run --rm --entrypoint node \ + -e MAX_BYTES="${WEB_SEARCH_MAX_BYTES}" \ + -e EXPECTED_MAX_BYTES="${WEB_SEARCH_MAX_BYTES}" \ + "${WEB_SEARCH_IMAGE}" \ + -e ' +const fs = require("node:fs"); +const expected = Number(process.env.EXPECTED_MAX_BYTES) || 0; +const actual = Number(process.env.MAX_BYTES) || 0; +const serverPath = "/usr/local/lib/node_modules/@zhafron/mcp-web-search/dist/src/server.js"; +const source = fs.readFileSync(serverPath, "utf8"); +if (actual !== expected) process.exit(1); +if (!source.includes("max_download_bytes: z.number().int().min(1).max(MAX_BYTES).optional()")) process.exit(1); +' >/dev/null 2>&1 +} + +check_web_search_bing_override() { + docker run --rm --entrypoint node \ + "${WEB_SEARCH_IMAGE}" \ + -e ' +const fs = require("node:fs"); +const bingPath = "/usr/local/lib/node_modules/@zhafron/mcp-web-search/dist/src/providers/bing.js"; +const source = fs.readFileSync(bingPath, "utf8"); +if (!source.includes("Context Kit override for @zhafron/mcp-web-search 1.3.0")) process.exit(1); +if (!source.includes("waitForSelector")) process.exit(1); +if (!source.includes("decodeBingRedirect")) process.exit(1); +' >/dev/null 2>&1 +} + +check_web_search_chrome() { + docker run --rm --entrypoint /usr/bin/test \ + "${WEB_SEARCH_IMAGE}" \ + -x "${WEB_SEARCH_CHROME_PATH}" >/dev/null 2>&1 +} + warn() { printf 'warn: %s\n' "$*" >&2 } @@ -257,9 +314,12 @@ cmd_status() { printf '\nImages\n' docker image ls --format '{{.Repository}}:{{.Tag}}\t{{.Size}}' \ | grep -E '^(context-kit/|ghcr.io/yamadashy/repomix:)' || true + printf '\nLabeled containers\n' + docker ps -a --filter label=dev.context-kit=true --format 'table {{.Names}}\t{{.Status}}\t{{.Image}}' printf '\nDocs MCP endpoint\n- %s (container: %s)\n' "${DOCS_HTTP_URL}" "${DOCS_CONTAINER_NAME}" printf '\nDocs sources\n' resolved_sources | sed 's/^/- /' + printf '\nLocal docs source directory\n- %s (served inside docs-mcp at http://127.0.0.1:%s/)\n' "${DOCS_LOCAL_SOURCES_DIR}" "${DOCS_LOCAL_SOURCES_PORT}" printf '\nData directory\n- %s\n' "${DATA_DIR}" } @@ -303,6 +363,27 @@ cmd_doctor() { fi done + if docker image inspect "${WEB_SEARCH_IMAGE}" >/dev/null 2>&1; then + if check_web_search_schema_patch; then + printf 'pass web-search fetch_url max-bytes schema patch: %s\n' "${WEB_SEARCH_MAX_BYTES}" + else + printf 'fail web-search max-bytes schema patch missing; run: context-kit build\n' + ok=1 + fi + if check_web_search_bing_override; then + printf 'pass web-search Bing provider override installed\n' + else + printf 'fail web-search Bing provider override missing; run: context-kit build\n' + ok=1 + fi + if check_web_search_chrome; then + printf 'pass web-search Chromium path: %s\n' "${WEB_SEARCH_CHROME_PATH}" + else + printf 'fail web-search Chromium path unavailable: %s\n' "${WEB_SEARCH_CHROME_PATH}" + ok=1 + fi + fi + if command -v curl >/dev/null 2>&1 && curl -fsS "http://127.0.0.1:${SEARXNG_PORT}/healthz" >/dev/null 2>&1; then printf 'pass SearXNG responds on 127.0.0.1:%s\n' "${SEARXNG_PORT}" else @@ -331,11 +412,14 @@ cmd_web_search() { exec docker run --rm -i \ --label dev.context-kit=true \ --network "${NETWORK}" \ - -e DEFAULT_SEARCH_PROVIDER="${DEFAULT_SEARCH_PROVIDER:-searxng}" \ + -e DEFAULT_SEARCH_PROVIDER="${WEB_SEARCH_PROVIDER}" \ -e SEARXNG_URL="${SEARXNG_URL:-http://searxng:8080}" \ - -e CHROME_PATH="${CHROME_PATH:-/usr/bin/chromium}" \ - -e HTTP_TIMEOUT="${HTTP_TIMEOUT:-15000}" \ - -e MAX_RESULTS="${MAX_RESULTS:-10}" \ + -e CHROME_PATH="${WEB_SEARCH_CHROME_PATH}" \ + -e HTTP_TIMEOUT="${WEB_SEARCH_HTTP_TIMEOUT}" \ + -e MAX_BYTES="${WEB_SEARCH_MAX_BYTES}" \ + -e MAX_RESULTS="${WEB_SEARCH_MAX_RESULTS}" \ + -e BROWSER_SEARCH_USER_AGENT="${WEB_SEARCH_BROWSER_USER_AGENT}" \ + -e MCP_COMPAT_MODE="${WEB_SEARCH_MCP_COMPAT_MODE}" \ "${WEB_SEARCH_IMAGE}" } @@ -397,12 +481,13 @@ print_opencode() { "type": "local", "command": ["${bin}", "web-search"], "enabled": true, - "timeout": 60000 + "timeout": 150000 }, "context-docs": { "type": "remote", "url": "${url}", - "enabled": true + "enabled": true, + "timeout": 150000 }, "context-repomix": { "type": "local", @@ -451,7 +536,7 @@ cmd_install() { cmd_redaction_check() { local bad=0 - local local_path_terms='/(home|Users)/[^/[:space:]]+|[A-Za-z]:\\Users\\[^\\[:space:]]+' + local local_path_terms='/(home|Users)/[^/[:space:]]+|/data/(projects|opencode-mcp)[^[:space:]]*|[A-Za-z]:\\Users\\[^\\[:space:]]+' local secret_terms='AKIA[0-9A-Z]{16}|BEGIN (RSA |OPENSSH |EC |DSA )?PRIVATE KEY|xox[baprs]-|sk-[A-Za-z0-9_-]{20,}|ghp_[A-Za-z0-9_]{20,}|github_pat_[A-Za-z0-9_]{20,}|glpat-[A-Za-z0-9_-]{20,}|gitea_[A-Za-z0-9_-]{20,}' # Scan only what would be published: skip .git plus everything .gitignore diff --git a/compose.yml b/compose.yml index c14c644..2afa332 100644 --- a/compose.yml +++ b/compose.yml @@ -9,7 +9,6 @@ services: environment: BASE_URL: "http://127.0.0.1:${CONTEXT_KIT_SEARXNG_PORT:-8099}/" INSTANCE_NAME: "context-kit-search" - SEARXNG_SECRET: "${CONTEXT_KIT_SEARXNG_SECRET:-change-me-local-only}" volumes: - ./docker/web-search/searxng/settings.yml:/etc/searxng/settings.yml:ro - searxng-cache:/var/cache/searxng @@ -19,16 +18,21 @@ services: web-search-mcp: build: context: ./docker/web-search + args: + MCP_WEB_SEARCH_MAX_BYTES: "${CONTEXT_KIT_WEB_SEARCH_MAX_BYTES:-52428800}" image: context-kit/web-search-mcp:latest profiles: ["mcp"] stdin_open: true tty: false environment: - DEFAULT_SEARCH_PROVIDER: "searxng" + DEFAULT_SEARCH_PROVIDER: "${CONTEXT_KIT_WEB_SEARCH_PROVIDER:-searxng}" SEARXNG_URL: "http://searxng:8080" - CHROME_PATH: "/usr/bin/chromium" - HTTP_TIMEOUT: "15000" - MAX_RESULTS: "10" + CHROME_PATH: "${CONTEXT_KIT_WEB_SEARCH_CHROME_PATH:-/usr/bin/chromium}" + HTTP_TIMEOUT: "${CONTEXT_KIT_WEB_SEARCH_HTTP_TIMEOUT:-15000}" + MAX_BYTES: "${CONTEXT_KIT_WEB_SEARCH_MAX_BYTES:-52428800}" + MAX_RESULTS: "${CONTEXT_KIT_WEB_SEARCH_MAX_RESULTS:-10}" + BROWSER_SEARCH_USER_AGENT: "${CONTEXT_KIT_WEB_SEARCH_BROWSER_USER_AGENT:-}" + MCP_COMPAT_MODE: "${CONTEXT_KIT_WEB_SEARCH_MCP_COMPAT_MODE:-}" labels: dev.context-kit: "true" @@ -53,6 +57,7 @@ services: DOCS_MCP_MAX_GET_BYTES: "${CONTEXT_KIT_DOCS_MAX_GET_BYTES:-75000}" DOCS_MCP_EMBED_MODEL: "${CONTEXT_KIT_DOCS_EMBED_MODEL:-BAAI/bge-small-en-v1.5}" DOCS_MCP_ALLOW_ORIGIN: "${CONTEXT_KIT_DOCS_ALLOW_ORIGIN:-}" + DOCS_MCP_LOCAL_SOURCES_PORT: "${CONTEXT_KIT_DOCS_LOCAL_SOURCES_PORT:-8769}" # Preindex on startup is off by default; use the docs_refresh tool to # refresh on demand. Set CONTEXT_KIT_DOCS_PREINDEX=1 to restore eager. DOCS_MCP_PREINDEX: "${CONTEXT_KIT_DOCS_PREINDEX:-0}" @@ -60,6 +65,7 @@ services: - ${CONTEXT_KIT_DATA_DIR:-${HOME}/.local/share/context-kit}/docs:/data - ${CONTEXT_KIT_DATA_DIR:-${HOME}/.local/share/context-kit}/models:/models - ${CONTEXT_KIT_DATA_DIR:-${HOME}/.local/share/context-kit}/docs-sources.txt:/etc/context-kit/docs-sources.txt:ro + - ${CONTEXT_KIT_DOCS_LOCAL_SOURCES_DIR:-${CONTEXT_KIT_DATA_DIR:-${HOME}/.local/share/context-kit}/local-sources}:/etc/context-kit/local-sources:ro healthcheck: test: ["CMD-SHELL", "python -c \"import urllib.request,sys; sys.exit(0 if urllib.request.urlopen('http://127.0.0.1:8000/status', timeout=2).status < 500 else 1)\""] interval: 30s diff --git a/config/sources.default.txt b/config/sources.default.txt index 6ee3ad4..8926aae 100644 --- a/config/sources.default.txt +++ b/config/sources.default.txt @@ -4,6 +4,5 @@ https://code.claude.com/docs/llms.txt https://developers.openai.com/api/docs/llms.txt https://developers.openai.com/api/reference/llms.txt -https://docs.anthropic.com/llms.txt https://openrouter.ai/docs/llms.txt https://modelcontextprotocol.io/llms-full.txt diff --git a/docker/docs/.dockerignore b/docker/docs/.dockerignore index b8d7fbb..5142853 100644 --- a/docker/docs/.dockerignore +++ b/docker/docs/.dockerignore @@ -1,3 +1,4 @@ * !Dockerfile !entrypoint.sh +!constraints.txt diff --git a/docker/docs/Dockerfile b/docker/docs/Dockerfile index cb8e1ca..74d59a4 100644 --- a/docker/docs/Dockerfile +++ b/docker/docs/Dockerfile @@ -1,7 +1,10 @@ -FROM python:3.12-slim +FROM python:3.12-slim@sha256:6c4dd321d176d61ea848dc8c73a4f7dbae8f70e0ee48bb411ea2f045b599fa8e ARG LLMS_TXT_MCP_VERSION=0.2.0 ARG MCP_PROXY_VERSION=0.12.0 +ARG TORCH_VERSION=2.12.1+cpu + +COPY constraints.txt /tmp/context-kit-docs-constraints.txt RUN apt-get update \ && apt-get install -y --no-install-recommends \ @@ -11,17 +14,19 @@ RUN apt-get update \ # Install CPU-only torch first so llms-txt-mcp does not pull large CUDA wheels. RUN pip install --no-cache-dir \ --index-url https://download.pytorch.org/whl/cpu \ - torch + -c /tmp/context-kit-docs-constraints.txt \ + "torch==${TORCH_VERSION}" # llms-txt-mcp does the indexing/search; mcp-proxy fronts its stdio transport # as Streamable HTTP so multiple MCP clients can share one long-lived process # (and therefore one Chroma DB writer). RUN if [ -n "${LLMS_TXT_MCP_VERSION}" ]; then \ - pip install --no-cache-dir "llms-txt-mcp==${LLMS_TXT_MCP_VERSION}"; \ + pip install --no-cache-dir -c /tmp/context-kit-docs-constraints.txt "llms-txt-mcp==${LLMS_TXT_MCP_VERSION}"; \ else \ - pip install --no-cache-dir llms-txt-mcp; \ + pip install --no-cache-dir -c /tmp/context-kit-docs-constraints.txt llms-txt-mcp; \ fi \ - && pip install --no-cache-dir "mcp-proxy==${MCP_PROXY_VERSION}" + && pip install --no-cache-dir -c /tmp/context-kit-docs-constraints.txt "mcp-proxy==${MCP_PROXY_VERSION}" \ + && rm /tmp/context-kit-docs-constraints.txt COPY entrypoint.sh /usr/local/bin/docs-mcp-entrypoint RUN chmod +x /usr/local/bin/docs-mcp-entrypoint diff --git a/docker/docs/constraints.txt b/docker/docs/constraints.txt new file mode 100644 index 0000000..9ac9bec --- /dev/null +++ b/docker/docs/constraints.txt @@ -0,0 +1,107 @@ +aiohappyeyeballs==2.6.2 +aiohttp==3.14.1 +aiosignal==1.4.0 +annotated-doc==0.0.4 +annotated-types==0.7.0 +anyio==4.14.1 +attrs==26.1.0 +bcrypt==5.0.0 +build==1.5.0 +certifi==2026.6.17 +cffi==2.0.0 +charset-normalizer==3.4.7 +chromadb==1.5.9 +click==8.4.2 +cryptography==49.0.0 +durationpy==0.10 +filelock==3.29.0 +flatbuffers==25.12.19 +frozenlist==1.8.0 +fsspec==2026.4.0 +googleapis-common-protos==1.75.0 +grpcio==1.81.1 +h11==0.16.0 +hf-xet==1.5.1 +httpcore==1.0.9 +httptools==0.8.0 +httpx==0.28.1 +httpx-sse==0.4.3 +httpx_auth==0.23.1 +huggingface_hub==1.20.1 +idna==3.18 +importlib_resources==7.1.0 +Jinja2==3.1.6 +joblib==1.5.3 +jsonschema==4.26.0 +jsonschema-specifications==2025.9.1 +kubernetes==36.0.2 +llms-txt-mcp==0.2.0 +markdown-it-py==4.2.0 +MarkupSafe==3.0.3 +mcp==1.28.0 +mcp-proxy==0.12.0 +mdurl==0.1.2 +mmh3==5.2.1 +mpmath==1.3.0 +multidict==6.7.1 +narwhals==2.22.1 +networkx==3.6.1 +numpy==2.5.0 +oauthlib==3.3.1 +onnxruntime==1.27.0 +opentelemetry-api==1.43.0 +opentelemetry-exporter-otlp-proto-common==1.43.0 +opentelemetry-exporter-otlp-proto-grpc==1.43.0 +opentelemetry-proto==1.43.0 +opentelemetry-sdk==1.43.0 +opentelemetry-semantic-conventions==0.64b0 +orjson==3.11.9 +overrides==7.7.0 +packaging==26.2 +propcache==0.5.2 +protobuf==7.35.1 +pybase64==1.4.3 +pycparser==3.0 +pydantic==2.13.4 +pydantic-settings==2.14.2 +pydantic_core==2.46.4 +Pygments==2.20.0 +PyJWT==2.13.0 +PyPika==0.51.1 +pyproject_hooks==1.2.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.2.2 +python-multipart==0.0.32 +PyYAML==6.0.3 +referencing==0.37.0 +regex==2026.5.9 +requests==2.34.2 +requests-oauthlib==2.0.0 +rich==15.0.0 +rpds-py==2026.5.1 +safetensors==0.8.0 +scikit-learn==1.9.0 +scipy==1.18.0 +sentence-transformers==5.6.0 +setuptools==70.2.0 +shellingham==1.5.4 +six==1.17.0 +sse-starlette==3.4.5 +starlette==1.3.1 +sympy==1.14.0 +tenacity==9.1.4 +threadpoolctl==3.6.0 +tokenizers==0.22.2 +torch==2.12.1+cpu +tqdm==4.68.3 +transformers==5.12.1 +typer==0.25.1 +typing-inspection==0.4.2 +typing_extensions==4.15.0 +urllib3==2.7.0 +uvicorn==0.49.0 +uvloop==0.22.1 +watchfiles==1.2.0 +websocket-client==1.9.0 +websockets==16.0 +yarl==1.24.2 diff --git a/docker/docs/entrypoint.sh b/docker/docs/entrypoint.sh index c2cfbc7..4cba799 100644 --- a/docker/docs/entrypoint.sh +++ b/docker/docs/entrypoint.sh @@ -12,6 +12,8 @@ set -eu sources_file="${DOCS_MCP_SOURCES_FILE:-/etc/context-kit/docs-sources.txt}" +local_sources_dir="${DOCS_MCP_LOCAL_SOURCES_DIR:-/etc/context-kit/local-sources}" +local_sources_port="${DOCS_MCP_LOCAL_SOURCES_PORT:-8769}" if [ ! -r "$sources_file" ]; then echo "docs-mcp: sources file not readable: $sources_file" >&2 @@ -27,11 +29,41 @@ if [ -z "$sources" ]; then exit 64 fi +if [ -d "$local_sources_dir" ]; then + python -m http.server "$local_sources_port" \ + --bind 127.0.0.1 \ + --directory "$local_sources_dir" \ + >/tmp/context-kit-local-sources.log 2>&1 & + local_sources_pid="$!" + if ! python - "$local_sources_port" <<'PY' +import sys +import time +import urllib.request + +port = sys.argv[1] +last_error = None +for _ in range(20): + try: + with urllib.request.urlopen(f"http://127.0.0.1:{port}/", timeout=0.5) as response: + if response.status < 500: + raise SystemExit(0) + except Exception as error: + last_error = error + time.sleep(0.1) +raise SystemExit(f"local source server did not become ready: {last_error}") +PY + then + kill "$local_sources_pid" 2>/dev/null || true + echo "docs-mcp: local source server failed on 127.0.0.1:$local_sources_port" >&2 + exit 65 + fi +fi + # By default llms-txt-mcp 0.2.0 re-embeds every source on launch (the actual # default is a background preindex, --no-preindex only disables the foreground -# variant). On a long-lived container that just wastes ~5 min of CPU per -# restart, so we disable BOTH and let the caller use `docs_refresh` on demand. -# Set DOCS_MCP_PREINDEX=1 to restore the eager behavior. +# variant). On a long-lived container that wastes CPU per restart, so we disable +# BOTH. Missing/stale sources still refresh on first docs_query/docs_refresh. +# Set DOCS_MCP_PREINDEX=1 to restore eager startup indexing. preindex_flag="--no-preindex --no-background-preindex" if [ "${DOCS_MCP_PREINDEX:-0}" = "1" ]; then preindex_flag="" diff --git a/docker/web-search/.dockerignore b/docker/web-search/.dockerignore index 5d0f124..77333f4 100644 --- a/docker/web-search/.dockerignore +++ b/docker/web-search/.dockerignore @@ -1,2 +1,5 @@ * !Dockerfile +!patch-mcp-web-search.mjs +!overrides/ +!overrides/bing.js diff --git a/docker/web-search/Dockerfile b/docker/web-search/Dockerfile index cb0f112..819cb00 100644 --- a/docker/web-search/Dockerfile +++ b/docker/web-search/Dockerfile @@ -1,7 +1,14 @@ -FROM node:22-bookworm-slim +FROM node:22-bookworm-slim@sha256:813a7480f28fdadac1f7f5c824bcdad435b5bc1322a5968bbbdef8d058f9dff4 ARG MCP_WEB_SEARCH_VERSION=1.3.0 +ARG MCP_WEB_SEARCH_MAX_BYTES=52428800 +COPY patch-mcp-web-search.mjs /tmp/patch-mcp-web-search.mjs +COPY overrides/bing.js /tmp/context-kit-bing-provider.js + +# Chromium intentionally tracks Debian security updates inside the pinned base +# image family; Bing's browser path is more likely to break with stale Chromium +# than with patched OS packages. RUN apt-get update \ && apt-get install -y --no-install-recommends \ ca-certificates \ @@ -10,11 +17,15 @@ RUN apt-get update \ && rm -rf /var/lib/apt/lists/* RUN npm install -g "@zhafron/mcp-web-search@${MCP_WEB_SEARCH_VERSION}" \ + && cp /tmp/context-kit-bing-provider.js /usr/local/lib/node_modules/@zhafron/mcp-web-search/dist/src/providers/bing.js \ + && node /tmp/patch-mcp-web-search.mjs \ + && rm /tmp/patch-mcp-web-search.mjs /tmp/context-kit-bing-provider.js \ && npm cache clean --force ENV CHROME_PATH=/usr/bin/chromium \ DEFAULT_SEARCH_PROVIDER=searxng \ HTTP_TIMEOUT=15000 \ + MAX_BYTES=${MCP_WEB_SEARCH_MAX_BYTES} \ MAX_RESULTS=10 \ SEARXNG_URL=http://searxng:8080 diff --git a/docker/web-search/overrides/bing.js b/docker/web-search/overrides/bing.js new file mode 100644 index 0000000..5c66901 --- /dev/null +++ b/docker/web-search/overrides/bing.js @@ -0,0 +1,114 @@ +import { PUPPETEER_TIMEOUT } from "../constants.js"; +import { browserPool } from "../utils/browser-pool.js"; +import { getAcceptLanguageHeader, getMarketFromLang } from "../utils/user-agent.js"; +import { searchCache, createCacheKey } from "../utils/cache.js"; + +// Context Kit override for @zhafron/mcp-web-search 1.3.0. +// The upstream provider can read Bing before result cards render and return an +// empty fallback. Keep this as a direct provider replacement until upstream +// waits for cards and decodes current /ck/a redirects reliably. +const DEFAULT_BROWSER_SEARCH_USER_AGENT = process.env.BROWSER_SEARCH_USER_AGENT || + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"; + +function decodeBase64Url(value) { + const normalized = value.replace(/-/g, "+").replace(/_/g, "/"); + const padded = normalized.padEnd(normalized.length + ((4 - normalized.length % 4) % 4), "="); + return Buffer.from(padded, "base64").toString("utf-8"); +} + +export class BingProvider { + name = "bing"; + + decodeBingRedirect(href) { + try { + const url = new URL(href, "https://www.bing.com/"); + if (url.hostname === "www.bing.com" && url.pathname === "/ck/a") { + const encoded = url.searchParams.get("u"); + if (encoded) { + const candidates = [encoded]; + if (/^[a-z][0-9]/i.test(encoded)) candidates.push(encoded.slice(2)); + for (const candidate of candidates) { + try { + const decoded = decodeBase64Url(candidate); + if (/^https?:\/\//i.test(decoded)) return decoded; + } + catch { } + } + } + } + return url.toString(); + } + catch { + return href; + } + } + + async search(q, limit, lang) { + const cacheKey = createCacheKey("bing", q, limit, lang); + const cached = searchCache.get(cacheKey); + if (cached) + return cached; + const market = getMarketFromLang(lang); + const results = await browserPool.withBrowser(async (browser) => { + const page = await browser.newPage(); + try { + await page.setViewport({ width: 1365, height: 768 }); + await page.setUserAgent(DEFAULT_BROWSER_SEARCH_USER_AGENT); + await page.setExtraHTTPHeaders(getAcceptLanguageHeader(lang)); + const url = new URL("https://www.bing.com/search"); + url.searchParams.set("q", q); + url.searchParams.set("mkt", market); + const response = await page.goto(url.toString(), { + waitUntil: "domcontentloaded", + timeout: PUPPETEER_TIMEOUT + }); + if (response && response.status() >= 400) { + throw new Error(`Bing HTTP ${response.status()}`); + } + await page.waitForSelector("li.b_algo h2 a[href], li.b_algo a[href]", { timeout: 10000 }).catch(() => undefined); + const items = await page.evaluate(maxResults => { + const parsed = []; + for (const card of Array.from(document.querySelectorAll("li.b_algo"))) { + const anchor = card.querySelector("h2 a[href]") || card.querySelector("a[href]"); + const title = anchor?.textContent?.trim() || ""; + const href = anchor?.getAttribute("href") || ""; + if (!title || !href) + continue; + const snippetElement = card.querySelector("div.b_caption p, div.b_snippet, p"); + const snippet = snippetElement?.textContent?.trim() || undefined; + parsed.push({ title, url: href, snippet }); + if (parsed.length >= maxResults) + break; + } + return parsed; + }, limit); + return items.flatMap(result => { + try { + const absolute = new URL(result.url, "https://www.bing.com/").toString(); + const decoded = this.decodeBingRedirect(absolute); + new URL(decoded); + return [{ ...result, url: decoded, source: "bing" }]; + } + catch { + return []; + } + }); + } + finally { + await page.close(); + } + }); + searchCache.set(cacheKey, results); + return results; + } + + async isAvailable() { + try { + await browserPool.getBrowser(); + return true; + } + catch { + return false; + } + } +} diff --git a/docker/web-search/patch-mcp-web-search.mjs b/docker/web-search/patch-mcp-web-search.mjs new file mode 100644 index 0000000..f5b4f8d --- /dev/null +++ b/docker/web-search/patch-mcp-web-search.mjs @@ -0,0 +1,28 @@ +import fs from "node:fs"; + +// Context Kit patch for @zhafron/mcp-web-search 1.3.0. +// Upstream hard-codes the fetch_url schema limit to 25 MiB even though the +// runtime extractor already uses MAX_BYTES. Keep this narrow and fail the build +// if upstream changes the compiled source shape. +const serverPath = "/usr/local/lib/node_modules/@zhafron/mcp-web-search/dist/src/server.js"; +let source = fs.readFileSync(serverPath, "utf8"); + +const replacements = [ + [ + 'import { MAX_RESULTS } from "./constants.js";', + 'import { MAX_BYTES, MAX_RESULTS } from "./constants.js";' + ], + [ + "max_download_bytes: z.number().int().min(1).max(26214400).optional()", + "max_download_bytes: z.number().int().min(1).max(MAX_BYTES).optional()" + ] +]; + +for (const [before, after] of replacements) { + if (!source.includes(before)) { + throw new Error(`mcp-web-search patch target not found: ${before}`); + } + source = source.replace(before, after); +} + +fs.writeFileSync(serverPath, source); diff --git a/docker/web-search/searxng/settings.yml b/docker/web-search/searxng/settings.yml index 64b89fa..de0d4fe 100644 --- a/docker/web-search/searxng/settings.yml +++ b/docker/web-search/searxng/settings.yml @@ -15,8 +15,8 @@ search: - json server: - # Local placeholder. The Docker service also sets SEARXNG_SECRET from .env; - # keep SearXNG bound to 127.0.0.1 unless you review this config separately. + # Local placeholder. Keep SearXNG bound to 127.0.0.1 unless you review this + # config and replace the secret_key for a deliberate non-local deployment. secret_key: "local-only-change-if-exposed" limiter: false image_proxy: true diff --git a/docs/configuration.md b/docs/configuration.md index fd6aaf7..c01fd66 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -14,6 +14,13 @@ shell code. | `CONTEXT_KIT_DATA_DIR` | `$HOME/.local/share/context-kit` | Persistent docs indexes and model cache | | `CONTEXT_KIT_COMPOSE_PROJECT` | `context-kit` | Docker Compose project and network prefix | | `CONTEXT_KIT_SEARXNG_PORT` | `8099` | Localhost SearXNG port | +| `CONTEXT_KIT_WEB_SEARCH_MAX_BYTES` | `52428800` | Max bytes `context-web-search` accepts and downloads per fetch | +| `CONTEXT_KIT_WEB_SEARCH_PROVIDER` | `searxng` | Default `search_web` provider; fallback order depends on this provider | +| `CONTEXT_KIT_WEB_SEARCH_HTTP_TIMEOUT` | `15000` | HTTP timeout in milliseconds for search providers | +| `CONTEXT_KIT_WEB_SEARCH_MAX_RESULTS` | `10` | Default search result count when clients omit `limit` | +| `CONTEXT_KIT_WEB_SEARCH_CHROME_PATH` | `/usr/bin/chromium` | Chromium path inside the web-search image for Bing fallback | +| `CONTEXT_KIT_WEB_SEARCH_BROWSER_USER_AGENT` | bundled Chrome/Linux UA | User agent for the Chromium-backed Bing fallback | +| `CONTEXT_KIT_WEB_SEARCH_MCP_COMPAT_MODE` | unset | Set to `legacy` for MCP clients with weak tool-schema parsers | | `CONTEXT_KIT_DOCS_PORT` | `8776` | Localhost port for the long-lived docs-mcp HTTP service | | `CONTEXT_KIT_DOCS_HTTP_URL` | `http://127.0.0.1:${CONTEXT_KIT_DOCS_PORT}/mcp` | URL emitted into HTTP MCP install snippets | | `CONTEXT_KIT_DOCS_ALLOW_ORIGIN` | unset | Optional exact browser CORS origin(s) for docs-mcp, separated by spaces | @@ -22,6 +29,8 @@ shell code. | `CONTEXT_KIT_DOCS_MAX_GET_BYTES` | `75000` | Max bytes returned by docs retrieval | | `CONTEXT_KIT_DOCS_EMBED_MODEL` | `BAAI/bge-small-en-v1.5` | SentenceTransformers embedding model | | `CONTEXT_KIT_DOCS_PREINDEX` | `0` | Set to `1` to re-embed every source on container start | +| `CONTEXT_KIT_DOCS_LOCAL_SOURCES_DIR` | `${CONTEXT_KIT_DATA_DIR}/local-sources` | Machine-local llms.txt tree mounted read-only into docs-mcp | +| `CONTEXT_KIT_DOCS_LOCAL_SOURCES_PORT` | `8769` | Loopback port inside docs-mcp for serving local source files | ## TTL Guidance @@ -66,3 +75,8 @@ CONTEXT_KIT_DOCS_SOURCES="config/sources.default.txt config/sources.js.txt" ``` Each source file is plain text. Blank lines and `#` comments are ignored. +Entries may be absolute source-profile paths for private machine-local config. +For local llms.txt files, place content under +`CONTEXT_KIT_DOCS_LOCAL_SOURCES_DIR` and reference it as +`http://127.0.0.1:8769/path/inside/local-sources.txt`; that loopback URL is +inside the docs-mcp container, not exposed on the host. diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 74bbb5f..f400346 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -33,6 +33,37 @@ Build default images: bin/context-kit build ``` +## Fetch URL Says Max Download Bytes Is Too Big + +If `fetch_url` fails before making a network request with an MCP validation error +like `Number must be less than or equal to 26214400`, rebuild the web-search MCP +image: + +```sh +bin/context-kit build +``` + +Context Kit patches the upstream `mcp-web-search` schema so the accepted +`max_download_bytes` value matches `CONTEXT_KIT_WEB_SEARCH_MAX_BYTES`, which +defaults to `52428800`. + +## Search Fallback and Chromium + +`search_web` defaults to SearXNG. If SearXNG fails or returns no results, the +upstream fallback order is DuckDuckGo, then Bing. Bing uses Chromium through +Puppeteer, so `bin/context-kit doctor` checks that the configured Chromium path +exists inside the web-search image. + +Context Kit carries a source-controlled Bing provider override in +`docker/web-search/overrides/bing.js` because the upstream 1.3.0 provider can +race result rendering and return no items even when Chromium sees Bing result +cards. The override waits for result cards and decodes current Bing redirect +URLs before handing results back to the upstream fallback registry. + +`fetch_url` is different: in upstream `mcp-web-search` 1.3.0, `engine=browser` is +accepted but reserved for future support. It does not currently invoke Chromium; +URL fetching uses the HTTP extractor path. + ## Docs Indexing Is Slow The first run downloads an embedding model and embeds every configured docs diff --git a/scripts/release-check b/scripts/release-check new file mode 100755 index 0000000..0aa2e1c --- /dev/null +++ b/scripts/release-check @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "${ROOT}" + +tmp_dir="$(mktemp -d)" +cleanup() { + rm -rf "${tmp_dir}" +} +trap cleanup EXIT + +check_node() { + local file + for file in "$@"; do + node --check "${file}" + done +} + +git diff --check +git ls-files --cached --error-unmatch \ + docker/web-search/patch-mcp-web-search.mjs \ + docker/web-search/overrides/bing.js \ + docker/docs/constraints.txt \ + scripts/smoke-web-search.mjs \ + scripts/release-check >/dev/null +bash -n bin/context-kit +bash -n scripts/release-check +sh -n docker/docs/entrypoint.sh +check_node docker/web-search/patch-mcp-web-search.mjs docker/web-search/overrides/bing.js scripts/smoke-web-search.mjs + +node -e 'const fs=require("node:fs"); JSON.parse(fs.readFileSync("snippets/opencode.json", "utf8")); JSON.parse(fs.readFileSync("snippets/claude.mcp.json", "utf8"));' +bin/context-kit install opencode > "${tmp_dir}/opencode.json" +bin/context-kit install opencode --absolute > "${tmp_dir}/opencode-absolute.json" +bin/context-kit install claude > "${tmp_dir}/claude.json" +bin/context-kit install claude --absolute > "${tmp_dir}/claude-absolute.json" +node -e 'const fs=require("node:fs"); for (const file of process.argv.slice(1)) JSON.parse(fs.readFileSync(file, "utf8"));' \ + "${tmp_dir}/opencode.json" \ + "${tmp_dir}/opencode-absolute.json" \ + "${tmp_dir}/claude.json" \ + "${tmp_dir}/claude-absolute.json" + +bin/context-kit redaction-check +docker compose -p context-kit -f compose.yml config >/dev/null +bin/context-kit build +bin/context-kit doctor +node scripts/smoke-web-search.mjs bin/context-kit web-search + +printf 'pass release-check\n' diff --git a/scripts/smoke-web-search.mjs b/scripts/smoke-web-search.mjs new file mode 100644 index 0000000..28bcda5 --- /dev/null +++ b/scripts/smoke-web-search.mjs @@ -0,0 +1,152 @@ +import { spawn } from "node:child_process"; + +const command = process.argv[2]; +const args = process.argv.slice(3); + +if (!command) { + throw new Error("usage: node scripts/smoke-web-search.mjs [args...]"); +} + +const child = spawn(command, args, { + cwd: new URL("..", import.meta.url).pathname, + env: process.env, + stdio: ["pipe", "pipe", "pipe"] +}); + +let nextId = 1; +const pending = new Map(); +let stdoutBuffer = ""; +let stderrBuffer = ""; + +function stopChild() { + child.stdin.end(); + child.kill("SIGTERM"); + const killTimer = setTimeout(() => child.kill("SIGKILL"), 3000); + return new Promise(resolve => { + child.once("exit", () => { + clearTimeout(killTimer); + resolve(); + }); + }); +} + +const timeout = setTimeout(async () => { + await stopChild(); + console.error(`MCP smoke timed out. stderr: ${stderrBuffer.slice(-2000)}`); + process.exit(1); +}, 120000); + +child.stderr.on("data", chunk => { + stderrBuffer += chunk.toString(); +}); + +child.stdout.on("data", chunk => { + stdoutBuffer += chunk.toString(); + let newline; + while ((newline = stdoutBuffer.indexOf("\n")) >= 0) { + const line = stdoutBuffer.slice(0, newline).trim(); + stdoutBuffer = stdoutBuffer.slice(newline + 1); + if (!line) continue; + let message; + try { + message = JSON.parse(line); + } catch { + continue; + } + if (message.id && pending.has(message.id)) { + const { resolve, reject } = pending.get(message.id); + pending.delete(message.id); + if (message.error) reject(new Error(JSON.stringify(message.error))); + else resolve(message.result); + } + } +}); + +function request(method, params = {}) { + const id = nextId++; + child.stdin.write(`${JSON.stringify({ jsonrpc: "2.0", id, method, params })}\n`); + return new Promise((resolve, reject) => pending.set(id, { resolve, reject })); +} + +function notify(method, params = {}) { + child.stdin.write(`${JSON.stringify({ jsonrpc: "2.0", method, params })}\n`); +} + +function textFrom(result) { + return (result.content || []) + .filter(part => part.type === "text") + .map(part => part.text) + .join("\n"); +} + +async function callTool(name, args = {}) { + return request("tools/call", { name, arguments: args }); +} + +try { + await request("initialize", { + protocolVersion: "2024-11-05", + capabilities: {}, + clientInfo: { name: "context-kit-smoke", version: "0.0.0" } + }); + notify("notifications/initialized"); + + const listed = await request("tools/list"); + const toolNames = new Set((listed.tools || []).map(tool => tool.name)); + for (const name of ["search_web", "fetch_url"]) { + if (!toolNames.has(name)) throw new Error(`missing tool: ${name}`); + } + + const searxng = textFrom(await callTool("search_web", { + q: "Model Context Protocol", + limit: 2, + provider: "searxng" + })); + if (!searxng.includes("Model")) throw new Error(`SearXNG smoke returned unexpected text: ${searxng.slice(0, 500)}`); + + const bing = textFrom(await callTool("search_web", { + q: "Model Context Protocol", + limit: 2, + provider: "bing" + })); + if (!bing.includes("Model")) throw new Error(`Bing smoke returned unexpected text: ${bing.slice(0, 500)}`); + + const fetch = textFrom(await callTool("fetch_url", { + url: "https://example.com/", + format: "markdown", + max_download_bytes: 52428800 + })); + if (!fetch.includes("Example Domain")) throw new Error(`fetch smoke returned unexpected text: ${fetch.slice(0, 500)}`); + + const browserFetch = textFrom(await callTool("fetch_url", { + url: "https://example.com/", + format: "markdown", + engine: "browser", + max_download_bytes: 52428800 + })); + if (!browserFetch.includes("Example Domain")) throw new Error(`browser fetch smoke returned unexpected text: ${browserFetch.slice(0, 500)}`); + + const localResult = await callTool("fetch_url", { + url: "http://127.0.0.1:1/", + max_download_bytes: 52428800 + }); + const localBlocked = Boolean(localResult.isError) && textFrom(localResult).includes("Blocked localhost/private URL"); + if (!localBlocked) throw new Error("localhost/private URL was not blocked as expected"); + + clearTimeout(timeout); + await stopChild(); + console.log(JSON.stringify({ + tools: Array.from(toolNames).sort(), + searxng: "pass", + bing: "pass", + fetch_url: "pass", + fetch_url_browser_engine_currently_http: "pass", + localhost_guard: "pass" + }, null, 2)); +} catch (error) { + clearTimeout(timeout); + await stopChild(); + console.error(error.message); + if (stderrBuffer) console.error(stderrBuffer.slice(-4000)); + process.exit(1); +} diff --git a/snippets/opencode.json b/snippets/opencode.json index fb783e6..1646f34 100644 --- a/snippets/opencode.json +++ b/snippets/opencode.json @@ -5,12 +5,13 @@ "type": "local", "command": ["context-kit", "web-search"], "enabled": true, - "timeout": 60000 + "timeout": 150000 }, "context-docs": { "type": "remote", "url": "http://127.0.0.1:8776/mcp", - "enabled": true + "enabled": true, + "timeout": 150000 }, "context-repomix": { "type": "local",