Harden web search and docs defaults
This commit is contained in:
21
.env.example
21
.env.example
@@ -10,9 +10,19 @@ CONTEXT_KIT_COMPOSE_PROJECT=context-kit
|
||||
# Local SearXNG port. Bound to 127.0.0.1 only.
|
||||
CONTEXT_KIT_SEARXNG_PORT=8099
|
||||
|
||||
# Local-only SearXNG secret. Set this to any random string if you expose SearXNG
|
||||
# beyond localhost, which the default setup does not do.
|
||||
CONTEXT_KIT_SEARXNG_SECRET=change-me-local-only
|
||||
# Max bytes accepted and downloaded by context-web-search fetch_url.
|
||||
# Keep this aligned with agent tool-call defaults to avoid schema rejections.
|
||||
CONTEXT_KIT_WEB_SEARCH_MAX_BYTES=52428800
|
||||
|
||||
# Web-search defaults. Search uses SearXNG first, then falls back to
|
||||
# DuckDuckGo and Bing. Bing requires Chromium inside the web-search image.
|
||||
CONTEXT_KIT_WEB_SEARCH_PROVIDER=searxng
|
||||
CONTEXT_KIT_WEB_SEARCH_HTTP_TIMEOUT=15000
|
||||
CONTEXT_KIT_WEB_SEARCH_MAX_RESULTS=10
|
||||
CONTEXT_KIT_WEB_SEARCH_CHROME_PATH=/usr/bin/chromium
|
||||
# User agent used by the Chromium-backed Bing search fallback.
|
||||
# CONTEXT_KIT_WEB_SEARCH_BROWSER_USER_AGENT="Mozilla/5.0 ..."
|
||||
# CONTEXT_KIT_WEB_SEARCH_MCP_COMPAT_MODE=legacy
|
||||
|
||||
# Long-lived context-docs HTTP MCP service. Bound to 127.0.0.1 only.
|
||||
CONTEXT_KIT_DOCS_PORT=8776
|
||||
@@ -33,3 +43,8 @@ CONTEXT_KIT_DOCS_EMBED_MODEL=BAAI/bge-small-en-v1.5
|
||||
|
||||
# One or more source files, separated by spaces.
|
||||
CONTEXT_KIT_DOCS_SOURCES=config/sources.default.txt
|
||||
|
||||
# Optional machine-local llms.txt tree. Files are served only inside docs-mcp at
|
||||
# http://127.0.0.1:8769/ so absolute local paths do not leak into source files.
|
||||
# CONTEXT_KIT_DOCS_LOCAL_SOURCES_DIR=/path/to/context-kit-local-sources
|
||||
# CONTEXT_KIT_DOCS_LOCAL_SOURCES_PORT=8769
|
||||
|
||||
11
README.md
11
README.md
@@ -10,7 +10,7 @@ Context Kit gives coding agents three local MCP servers:
|
||||
|
||||
| Server | Purpose | Default |
|
||||
|---|---|---|
|
||||
| `context-web-search` | Current web search and URL fetch through local SearXNG | Enabled |
|
||||
| `context-web-search` | Current web search through local SearXNG plus URL fetch/extract | Enabled |
|
||||
| `context-docs` | Semantic search over curated `llms.txt` documentation | Enabled |
|
||||
| `context-repomix` | Pack local or remote repositories into AI-friendly context | Enabled |
|
||||
|
||||
@@ -56,6 +56,10 @@ config that will not be committed.
|
||||
## Defaults
|
||||
|
||||
- SearXNG binds to `127.0.0.1:8099` only.
|
||||
- `context-web-search` defaults `search_web` to SearXNG, then falls back to
|
||||
DuckDuckGo and Bing. Bing uses Chromium inside the web-search image.
|
||||
- `fetch_url` uses upstream HTTP extraction. In `mcp-web-search` 1.3.0,
|
||||
`engine=browser` is accepted but does not invoke Chromium yet.
|
||||
- `context-docs` runs as a long-lived service on `127.0.0.1:8776` (Streamable
|
||||
HTTP MCP) so every client shares one indexer and one Chroma writer. The
|
||||
`bin/context-kit docs` stdio command is kept as a compatibility shim for
|
||||
@@ -74,7 +78,6 @@ The default docs index is intentionally small:
|
||||
|
||||
- Claude Code docs
|
||||
- OpenAI API docs and reference
|
||||
- Anthropic docs
|
||||
- OpenRouter docs
|
||||
- Model Context Protocol docs
|
||||
|
||||
@@ -91,8 +94,8 @@ CONTEXT_KIT_DOCS_SOURCES="config/sources.default.txt config/sources.js.txt" \
|
||||
bin/context-kit docs
|
||||
```
|
||||
|
||||
Cloudflare is opt-in because it can expand to thousands of sections and take a
|
||||
while to embed.
|
||||
Large vendor feeds are opt-in because they can expand to thousands of sections
|
||||
and take a while to embed.
|
||||
|
||||
## Commands
|
||||
|
||||
|
||||
103
bin/context-kit
103
bin/context-kit
@@ -1,7 +1,17 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
SCRIPT_PATH="${BASH_SOURCE[0]}"
|
||||
while [[ -L "${SCRIPT_PATH}" ]]; do
|
||||
SCRIPT_DIR="$(cd -P "$(dirname "${SCRIPT_PATH}")" && pwd)"
|
||||
SCRIPT_TARGET="$(readlink "${SCRIPT_PATH}")"
|
||||
if [[ "${SCRIPT_TARGET}" = /* ]]; then
|
||||
SCRIPT_PATH="${SCRIPT_TARGET}"
|
||||
else
|
||||
SCRIPT_PATH="${SCRIPT_DIR}/${SCRIPT_TARGET}"
|
||||
fi
|
||||
done
|
||||
ROOT="$(cd -P "$(dirname "${SCRIPT_PATH}")/.." && pwd)"
|
||||
ENV_FILE="${ROOT}/.env"
|
||||
|
||||
load_env_file() {
|
||||
@@ -39,10 +49,19 @@ NETWORK="${CONTEXT_KIT_DOCKER_NETWORK:-${PROJECT}_default}"
|
||||
SEARXNG_PORT="${CONTEXT_KIT_SEARXNG_PORT:-8099}"
|
||||
DOCS_PORT="${CONTEXT_KIT_DOCS_PORT:-8776}"
|
||||
DOCS_HTTP_URL="${CONTEXT_KIT_DOCS_HTTP_URL:-http://127.0.0.1:${DOCS_PORT}/mcp}"
|
||||
WEB_SEARCH_MAX_BYTES="${CONTEXT_KIT_WEB_SEARCH_MAX_BYTES:-52428800}"
|
||||
WEB_SEARCH_PROVIDER="${CONTEXT_KIT_WEB_SEARCH_PROVIDER:-${DEFAULT_SEARCH_PROVIDER:-searxng}}"
|
||||
WEB_SEARCH_HTTP_TIMEOUT="${CONTEXT_KIT_WEB_SEARCH_HTTP_TIMEOUT:-${HTTP_TIMEOUT:-15000}}"
|
||||
WEB_SEARCH_MAX_RESULTS="${CONTEXT_KIT_WEB_SEARCH_MAX_RESULTS:-${MAX_RESULTS:-10}}"
|
||||
WEB_SEARCH_CHROME_PATH="${CONTEXT_KIT_WEB_SEARCH_CHROME_PATH:-${CHROME_PATH:-/usr/bin/chromium}}"
|
||||
WEB_SEARCH_BROWSER_USER_AGENT="${CONTEXT_KIT_WEB_SEARCH_BROWSER_USER_AGENT:-${BROWSER_SEARCH_USER_AGENT:-}}"
|
||||
WEB_SEARCH_MCP_COMPAT_MODE="${CONTEXT_KIT_WEB_SEARCH_MCP_COMPAT_MODE:-${MCP_COMPAT_MODE:-}}"
|
||||
DOCS_CONTAINER_NAME="context-kit-docs-mcp"
|
||||
DOCS_SOURCES_FILE="${DATA_DIR}/docs-sources.txt"
|
||||
DOCS_DATA_DIR="${DATA_DIR}/docs"
|
||||
MODELS_DATA_DIR="${DATA_DIR}/models"
|
||||
DOCS_LOCAL_SOURCES_DIR="${CONTEXT_KIT_DOCS_LOCAL_SOURCES_DIR:-${DATA_DIR}/local-sources}"
|
||||
DOCS_LOCAL_SOURCES_PORT="${CONTEXT_KIT_DOCS_LOCAL_SOURCES_PORT:-8769}"
|
||||
|
||||
WEB_SEARCH_IMAGE="${CONTEXT_KIT_WEB_SEARCH_IMAGE:-context-kit/web-search-mcp:latest}"
|
||||
DOCS_IMAGE="${CONTEXT_KIT_DOCS_IMAGE:-context-kit/docs-mcp:latest}"
|
||||
@@ -86,6 +105,8 @@ compose() {
|
||||
CONTEXT_KIT_DOCS_MAX_GET_BYTES="${CONTEXT_KIT_DOCS_MAX_GET_BYTES:-75000}" \
|
||||
CONTEXT_KIT_DOCS_EMBED_MODEL="${CONTEXT_KIT_DOCS_EMBED_MODEL:-BAAI/bge-small-en-v1.5}" \
|
||||
CONTEXT_KIT_DOCS_PREINDEX="${CONTEXT_KIT_DOCS_PREINDEX:-0}" \
|
||||
CONTEXT_KIT_DOCS_LOCAL_SOURCES_DIR="${DOCS_LOCAL_SOURCES_DIR}" \
|
||||
CONTEXT_KIT_DOCS_LOCAL_SOURCES_PORT="${DOCS_LOCAL_SOURCES_PORT}" \
|
||||
BUILDX_BUILDER="${CONTEXT_KIT_BUILDX_BUILDER:-${BUILDX_BUILDER:-default}}" \
|
||||
docker compose -p "${PROJECT}" -f "${COMPOSE_FILE}" "$@"
|
||||
}
|
||||
@@ -112,11 +133,12 @@ prepare_data_dirs() {
|
||||
ensure_writable_dir "${DATA_DIR}"
|
||||
ensure_writable_dir "${DOCS_DATA_DIR}"
|
||||
ensure_writable_dir "${MODELS_DATA_DIR}"
|
||||
ensure_writable_dir "${DOCS_LOCAL_SOURCES_DIR}"
|
||||
}
|
||||
|
||||
check_data_dirs() {
|
||||
local ok=0 dir
|
||||
for dir in "${DATA_DIR}" "${DOCS_DATA_DIR}" "${MODELS_DATA_DIR}"; do
|
||||
for dir in "${DATA_DIR}" "${DOCS_DATA_DIR}" "${MODELS_DATA_DIR}" "${DOCS_LOCAL_SOURCES_DIR}"; do
|
||||
if [[ ! -d "${dir}" ]]; then
|
||||
printf 'warn data directory missing: %s (run context-kit start)\n' "${dir}"
|
||||
elif [[ -w "${dir}" && -x "${dir}" ]]; then
|
||||
@@ -129,6 +151,41 @@ check_data_dirs() {
|
||||
return "${ok}"
|
||||
}
|
||||
|
||||
check_web_search_schema_patch() {
|
||||
docker run --rm --entrypoint node \
|
||||
-e MAX_BYTES="${WEB_SEARCH_MAX_BYTES}" \
|
||||
-e EXPECTED_MAX_BYTES="${WEB_SEARCH_MAX_BYTES}" \
|
||||
"${WEB_SEARCH_IMAGE}" \
|
||||
-e '
|
||||
const fs = require("node:fs");
|
||||
const expected = Number(process.env.EXPECTED_MAX_BYTES) || 0;
|
||||
const actual = Number(process.env.MAX_BYTES) || 0;
|
||||
const serverPath = "/usr/local/lib/node_modules/@zhafron/mcp-web-search/dist/src/server.js";
|
||||
const source = fs.readFileSync(serverPath, "utf8");
|
||||
if (actual !== expected) process.exit(1);
|
||||
if (!source.includes("max_download_bytes: z.number().int().min(1).max(MAX_BYTES).optional()")) process.exit(1);
|
||||
' >/dev/null 2>&1
|
||||
}
|
||||
|
||||
check_web_search_bing_override() {
|
||||
docker run --rm --entrypoint node \
|
||||
"${WEB_SEARCH_IMAGE}" \
|
||||
-e '
|
||||
const fs = require("node:fs");
|
||||
const bingPath = "/usr/local/lib/node_modules/@zhafron/mcp-web-search/dist/src/providers/bing.js";
|
||||
const source = fs.readFileSync(bingPath, "utf8");
|
||||
if (!source.includes("Context Kit override for @zhafron/mcp-web-search 1.3.0")) process.exit(1);
|
||||
if (!source.includes("waitForSelector")) process.exit(1);
|
||||
if (!source.includes("decodeBingRedirect")) process.exit(1);
|
||||
' >/dev/null 2>&1
|
||||
}
|
||||
|
||||
check_web_search_chrome() {
|
||||
docker run --rm --entrypoint /usr/bin/test \
|
||||
"${WEB_SEARCH_IMAGE}" \
|
||||
-x "${WEB_SEARCH_CHROME_PATH}" >/dev/null 2>&1
|
||||
}
|
||||
|
||||
warn() {
|
||||
printf 'warn: %s\n' "$*" >&2
|
||||
}
|
||||
@@ -257,9 +314,12 @@ cmd_status() {
|
||||
printf '\nImages\n'
|
||||
docker image ls --format '{{.Repository}}:{{.Tag}}\t{{.Size}}' \
|
||||
| grep -E '^(context-kit/|ghcr.io/yamadashy/repomix:)' || true
|
||||
printf '\nLabeled containers\n'
|
||||
docker ps -a --filter label=dev.context-kit=true --format 'table {{.Names}}\t{{.Status}}\t{{.Image}}'
|
||||
printf '\nDocs MCP endpoint\n- %s (container: %s)\n' "${DOCS_HTTP_URL}" "${DOCS_CONTAINER_NAME}"
|
||||
printf '\nDocs sources\n'
|
||||
resolved_sources | sed 's/^/- /'
|
||||
printf '\nLocal docs source directory\n- %s (served inside docs-mcp at http://127.0.0.1:%s/)\n' "${DOCS_LOCAL_SOURCES_DIR}" "${DOCS_LOCAL_SOURCES_PORT}"
|
||||
printf '\nData directory\n- %s\n' "${DATA_DIR}"
|
||||
}
|
||||
|
||||
@@ -303,6 +363,27 @@ cmd_doctor() {
|
||||
fi
|
||||
done
|
||||
|
||||
if docker image inspect "${WEB_SEARCH_IMAGE}" >/dev/null 2>&1; then
|
||||
if check_web_search_schema_patch; then
|
||||
printf 'pass web-search fetch_url max-bytes schema patch: %s\n' "${WEB_SEARCH_MAX_BYTES}"
|
||||
else
|
||||
printf 'fail web-search max-bytes schema patch missing; run: context-kit build\n'
|
||||
ok=1
|
||||
fi
|
||||
if check_web_search_bing_override; then
|
||||
printf 'pass web-search Bing provider override installed\n'
|
||||
else
|
||||
printf 'fail web-search Bing provider override missing; run: context-kit build\n'
|
||||
ok=1
|
||||
fi
|
||||
if check_web_search_chrome; then
|
||||
printf 'pass web-search Chromium path: %s\n' "${WEB_SEARCH_CHROME_PATH}"
|
||||
else
|
||||
printf 'fail web-search Chromium path unavailable: %s\n' "${WEB_SEARCH_CHROME_PATH}"
|
||||
ok=1
|
||||
fi
|
||||
fi
|
||||
|
||||
if command -v curl >/dev/null 2>&1 && curl -fsS "http://127.0.0.1:${SEARXNG_PORT}/healthz" >/dev/null 2>&1; then
|
||||
printf 'pass SearXNG responds on 127.0.0.1:%s\n' "${SEARXNG_PORT}"
|
||||
else
|
||||
@@ -331,11 +412,14 @@ cmd_web_search() {
|
||||
exec docker run --rm -i \
|
||||
--label dev.context-kit=true \
|
||||
--network "${NETWORK}" \
|
||||
-e DEFAULT_SEARCH_PROVIDER="${DEFAULT_SEARCH_PROVIDER:-searxng}" \
|
||||
-e DEFAULT_SEARCH_PROVIDER="${WEB_SEARCH_PROVIDER}" \
|
||||
-e SEARXNG_URL="${SEARXNG_URL:-http://searxng:8080}" \
|
||||
-e CHROME_PATH="${CHROME_PATH:-/usr/bin/chromium}" \
|
||||
-e HTTP_TIMEOUT="${HTTP_TIMEOUT:-15000}" \
|
||||
-e MAX_RESULTS="${MAX_RESULTS:-10}" \
|
||||
-e CHROME_PATH="${WEB_SEARCH_CHROME_PATH}" \
|
||||
-e HTTP_TIMEOUT="${WEB_SEARCH_HTTP_TIMEOUT}" \
|
||||
-e MAX_BYTES="${WEB_SEARCH_MAX_BYTES}" \
|
||||
-e MAX_RESULTS="${WEB_SEARCH_MAX_RESULTS}" \
|
||||
-e BROWSER_SEARCH_USER_AGENT="${WEB_SEARCH_BROWSER_USER_AGENT}" \
|
||||
-e MCP_COMPAT_MODE="${WEB_SEARCH_MCP_COMPAT_MODE}" \
|
||||
"${WEB_SEARCH_IMAGE}"
|
||||
}
|
||||
|
||||
@@ -397,12 +481,13 @@ print_opencode() {
|
||||
"type": "local",
|
||||
"command": ["${bin}", "web-search"],
|
||||
"enabled": true,
|
||||
"timeout": 60000
|
||||
"timeout": 150000
|
||||
},
|
||||
"context-docs": {
|
||||
"type": "remote",
|
||||
"url": "${url}",
|
||||
"enabled": true
|
||||
"enabled": true,
|
||||
"timeout": 150000
|
||||
},
|
||||
"context-repomix": {
|
||||
"type": "local",
|
||||
@@ -451,7 +536,7 @@ cmd_install() {
|
||||
|
||||
cmd_redaction_check() {
|
||||
local bad=0
|
||||
local local_path_terms='/(home|Users)/[^/[:space:]]+|[A-Za-z]:\\Users\\[^\\[:space:]]+'
|
||||
local local_path_terms='/(home|Users)/[^/[:space:]]+|/data/(projects|opencode-mcp)[^[:space:]]*|[A-Za-z]:\\Users\\[^\\[:space:]]+'
|
||||
local secret_terms='AKIA[0-9A-Z]{16}|BEGIN (RSA |OPENSSH |EC |DSA )?PRIVATE KEY|xox[baprs]-|sk-[A-Za-z0-9_-]{20,}|ghp_[A-Za-z0-9_]{20,}|github_pat_[A-Za-z0-9_]{20,}|glpat-[A-Za-z0-9_-]{20,}|gitea_[A-Za-z0-9_-]{20,}'
|
||||
|
||||
# Scan only what would be published: skip .git plus everything .gitignore
|
||||
|
||||
16
compose.yml
16
compose.yml
@@ -9,7 +9,6 @@ services:
|
||||
environment:
|
||||
BASE_URL: "http://127.0.0.1:${CONTEXT_KIT_SEARXNG_PORT:-8099}/"
|
||||
INSTANCE_NAME: "context-kit-search"
|
||||
SEARXNG_SECRET: "${CONTEXT_KIT_SEARXNG_SECRET:-change-me-local-only}"
|
||||
volumes:
|
||||
- ./docker/web-search/searxng/settings.yml:/etc/searxng/settings.yml:ro
|
||||
- searxng-cache:/var/cache/searxng
|
||||
@@ -19,16 +18,21 @@ services:
|
||||
web-search-mcp:
|
||||
build:
|
||||
context: ./docker/web-search
|
||||
args:
|
||||
MCP_WEB_SEARCH_MAX_BYTES: "${CONTEXT_KIT_WEB_SEARCH_MAX_BYTES:-52428800}"
|
||||
image: context-kit/web-search-mcp:latest
|
||||
profiles: ["mcp"]
|
||||
stdin_open: true
|
||||
tty: false
|
||||
environment:
|
||||
DEFAULT_SEARCH_PROVIDER: "searxng"
|
||||
DEFAULT_SEARCH_PROVIDER: "${CONTEXT_KIT_WEB_SEARCH_PROVIDER:-searxng}"
|
||||
SEARXNG_URL: "http://searxng:8080"
|
||||
CHROME_PATH: "/usr/bin/chromium"
|
||||
HTTP_TIMEOUT: "15000"
|
||||
MAX_RESULTS: "10"
|
||||
CHROME_PATH: "${CONTEXT_KIT_WEB_SEARCH_CHROME_PATH:-/usr/bin/chromium}"
|
||||
HTTP_TIMEOUT: "${CONTEXT_KIT_WEB_SEARCH_HTTP_TIMEOUT:-15000}"
|
||||
MAX_BYTES: "${CONTEXT_KIT_WEB_SEARCH_MAX_BYTES:-52428800}"
|
||||
MAX_RESULTS: "${CONTEXT_KIT_WEB_SEARCH_MAX_RESULTS:-10}"
|
||||
BROWSER_SEARCH_USER_AGENT: "${CONTEXT_KIT_WEB_SEARCH_BROWSER_USER_AGENT:-}"
|
||||
MCP_COMPAT_MODE: "${CONTEXT_KIT_WEB_SEARCH_MCP_COMPAT_MODE:-}"
|
||||
labels:
|
||||
dev.context-kit: "true"
|
||||
|
||||
@@ -53,6 +57,7 @@ services:
|
||||
DOCS_MCP_MAX_GET_BYTES: "${CONTEXT_KIT_DOCS_MAX_GET_BYTES:-75000}"
|
||||
DOCS_MCP_EMBED_MODEL: "${CONTEXT_KIT_DOCS_EMBED_MODEL:-BAAI/bge-small-en-v1.5}"
|
||||
DOCS_MCP_ALLOW_ORIGIN: "${CONTEXT_KIT_DOCS_ALLOW_ORIGIN:-}"
|
||||
DOCS_MCP_LOCAL_SOURCES_PORT: "${CONTEXT_KIT_DOCS_LOCAL_SOURCES_PORT:-8769}"
|
||||
# Preindex on startup is off by default; use the docs_refresh tool to
|
||||
# refresh on demand. Set CONTEXT_KIT_DOCS_PREINDEX=1 to restore eager.
|
||||
DOCS_MCP_PREINDEX: "${CONTEXT_KIT_DOCS_PREINDEX:-0}"
|
||||
@@ -60,6 +65,7 @@ services:
|
||||
- ${CONTEXT_KIT_DATA_DIR:-${HOME}/.local/share/context-kit}/docs:/data
|
||||
- ${CONTEXT_KIT_DATA_DIR:-${HOME}/.local/share/context-kit}/models:/models
|
||||
- ${CONTEXT_KIT_DATA_DIR:-${HOME}/.local/share/context-kit}/docs-sources.txt:/etc/context-kit/docs-sources.txt:ro
|
||||
- ${CONTEXT_KIT_DOCS_LOCAL_SOURCES_DIR:-${CONTEXT_KIT_DATA_DIR:-${HOME}/.local/share/context-kit}/local-sources}:/etc/context-kit/local-sources:ro
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "python -c \"import urllib.request,sys; sys.exit(0 if urllib.request.urlopen('http://127.0.0.1:8000/status', timeout=2).status < 500 else 1)\""]
|
||||
interval: 30s
|
||||
|
||||
@@ -4,6 +4,5 @@
|
||||
https://code.claude.com/docs/llms.txt
|
||||
https://developers.openai.com/api/docs/llms.txt
|
||||
https://developers.openai.com/api/reference/llms.txt
|
||||
https://docs.anthropic.com/llms.txt
|
||||
https://openrouter.ai/docs/llms.txt
|
||||
https://modelcontextprotocol.io/llms-full.txt
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
*
|
||||
!Dockerfile
|
||||
!entrypoint.sh
|
||||
!constraints.txt
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
FROM python:3.12-slim
|
||||
FROM python:3.12-slim@sha256:6c4dd321d176d61ea848dc8c73a4f7dbae8f70e0ee48bb411ea2f045b599fa8e
|
||||
|
||||
ARG LLMS_TXT_MCP_VERSION=0.2.0
|
||||
ARG MCP_PROXY_VERSION=0.12.0
|
||||
ARG TORCH_VERSION=2.12.1+cpu
|
||||
|
||||
COPY constraints.txt /tmp/context-kit-docs-constraints.txt
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
@@ -11,17 +14,19 @@ RUN apt-get update \
|
||||
# Install CPU-only torch first so llms-txt-mcp does not pull large CUDA wheels.
|
||||
RUN pip install --no-cache-dir \
|
||||
--index-url https://download.pytorch.org/whl/cpu \
|
||||
torch
|
||||
-c /tmp/context-kit-docs-constraints.txt \
|
||||
"torch==${TORCH_VERSION}"
|
||||
|
||||
# llms-txt-mcp does the indexing/search; mcp-proxy fronts its stdio transport
|
||||
# as Streamable HTTP so multiple MCP clients can share one long-lived process
|
||||
# (and therefore one Chroma DB writer).
|
||||
RUN if [ -n "${LLMS_TXT_MCP_VERSION}" ]; then \
|
||||
pip install --no-cache-dir "llms-txt-mcp==${LLMS_TXT_MCP_VERSION}"; \
|
||||
pip install --no-cache-dir -c /tmp/context-kit-docs-constraints.txt "llms-txt-mcp==${LLMS_TXT_MCP_VERSION}"; \
|
||||
else \
|
||||
pip install --no-cache-dir llms-txt-mcp; \
|
||||
pip install --no-cache-dir -c /tmp/context-kit-docs-constraints.txt llms-txt-mcp; \
|
||||
fi \
|
||||
&& pip install --no-cache-dir "mcp-proxy==${MCP_PROXY_VERSION}"
|
||||
&& pip install --no-cache-dir -c /tmp/context-kit-docs-constraints.txt "mcp-proxy==${MCP_PROXY_VERSION}" \
|
||||
&& rm /tmp/context-kit-docs-constraints.txt
|
||||
|
||||
COPY entrypoint.sh /usr/local/bin/docs-mcp-entrypoint
|
||||
RUN chmod +x /usr/local/bin/docs-mcp-entrypoint
|
||||
|
||||
107
docker/docs/constraints.txt
Normal file
107
docker/docs/constraints.txt
Normal file
@@ -0,0 +1,107 @@
|
||||
aiohappyeyeballs==2.6.2
|
||||
aiohttp==3.14.1
|
||||
aiosignal==1.4.0
|
||||
annotated-doc==0.0.4
|
||||
annotated-types==0.7.0
|
||||
anyio==4.14.1
|
||||
attrs==26.1.0
|
||||
bcrypt==5.0.0
|
||||
build==1.5.0
|
||||
certifi==2026.6.17
|
||||
cffi==2.0.0
|
||||
charset-normalizer==3.4.7
|
||||
chromadb==1.5.9
|
||||
click==8.4.2
|
||||
cryptography==49.0.0
|
||||
durationpy==0.10
|
||||
filelock==3.29.0
|
||||
flatbuffers==25.12.19
|
||||
frozenlist==1.8.0
|
||||
fsspec==2026.4.0
|
||||
googleapis-common-protos==1.75.0
|
||||
grpcio==1.81.1
|
||||
h11==0.16.0
|
||||
hf-xet==1.5.1
|
||||
httpcore==1.0.9
|
||||
httptools==0.8.0
|
||||
httpx==0.28.1
|
||||
httpx-sse==0.4.3
|
||||
httpx_auth==0.23.1
|
||||
huggingface_hub==1.20.1
|
||||
idna==3.18
|
||||
importlib_resources==7.1.0
|
||||
Jinja2==3.1.6
|
||||
joblib==1.5.3
|
||||
jsonschema==4.26.0
|
||||
jsonschema-specifications==2025.9.1
|
||||
kubernetes==36.0.2
|
||||
llms-txt-mcp==0.2.0
|
||||
markdown-it-py==4.2.0
|
||||
MarkupSafe==3.0.3
|
||||
mcp==1.28.0
|
||||
mcp-proxy==0.12.0
|
||||
mdurl==0.1.2
|
||||
mmh3==5.2.1
|
||||
mpmath==1.3.0
|
||||
multidict==6.7.1
|
||||
narwhals==2.22.1
|
||||
networkx==3.6.1
|
||||
numpy==2.5.0
|
||||
oauthlib==3.3.1
|
||||
onnxruntime==1.27.0
|
||||
opentelemetry-api==1.43.0
|
||||
opentelemetry-exporter-otlp-proto-common==1.43.0
|
||||
opentelemetry-exporter-otlp-proto-grpc==1.43.0
|
||||
opentelemetry-proto==1.43.0
|
||||
opentelemetry-sdk==1.43.0
|
||||
opentelemetry-semantic-conventions==0.64b0
|
||||
orjson==3.11.9
|
||||
overrides==7.7.0
|
||||
packaging==26.2
|
||||
propcache==0.5.2
|
||||
protobuf==7.35.1
|
||||
pybase64==1.4.3
|
||||
pycparser==3.0
|
||||
pydantic==2.13.4
|
||||
pydantic-settings==2.14.2
|
||||
pydantic_core==2.46.4
|
||||
Pygments==2.20.0
|
||||
PyJWT==2.13.0
|
||||
PyPika==0.51.1
|
||||
pyproject_hooks==1.2.0
|
||||
python-dateutil==2.9.0.post0
|
||||
python-dotenv==1.2.2
|
||||
python-multipart==0.0.32
|
||||
PyYAML==6.0.3
|
||||
referencing==0.37.0
|
||||
regex==2026.5.9
|
||||
requests==2.34.2
|
||||
requests-oauthlib==2.0.0
|
||||
rich==15.0.0
|
||||
rpds-py==2026.5.1
|
||||
safetensors==0.8.0
|
||||
scikit-learn==1.9.0
|
||||
scipy==1.18.0
|
||||
sentence-transformers==5.6.0
|
||||
setuptools==70.2.0
|
||||
shellingham==1.5.4
|
||||
six==1.17.0
|
||||
sse-starlette==3.4.5
|
||||
starlette==1.3.1
|
||||
sympy==1.14.0
|
||||
tenacity==9.1.4
|
||||
threadpoolctl==3.6.0
|
||||
tokenizers==0.22.2
|
||||
torch==2.12.1+cpu
|
||||
tqdm==4.68.3
|
||||
transformers==5.12.1
|
||||
typer==0.25.1
|
||||
typing-inspection==0.4.2
|
||||
typing_extensions==4.15.0
|
||||
urllib3==2.7.0
|
||||
uvicorn==0.49.0
|
||||
uvloop==0.22.1
|
||||
watchfiles==1.2.0
|
||||
websocket-client==1.9.0
|
||||
websockets==16.0
|
||||
yarl==1.24.2
|
||||
@@ -12,6 +12,8 @@
|
||||
set -eu
|
||||
|
||||
sources_file="${DOCS_MCP_SOURCES_FILE:-/etc/context-kit/docs-sources.txt}"
|
||||
local_sources_dir="${DOCS_MCP_LOCAL_SOURCES_DIR:-/etc/context-kit/local-sources}"
|
||||
local_sources_port="${DOCS_MCP_LOCAL_SOURCES_PORT:-8769}"
|
||||
|
||||
if [ ! -r "$sources_file" ]; then
|
||||
echo "docs-mcp: sources file not readable: $sources_file" >&2
|
||||
@@ -27,11 +29,41 @@ if [ -z "$sources" ]; then
|
||||
exit 64
|
||||
fi
|
||||
|
||||
if [ -d "$local_sources_dir" ]; then
|
||||
python -m http.server "$local_sources_port" \
|
||||
--bind 127.0.0.1 \
|
||||
--directory "$local_sources_dir" \
|
||||
>/tmp/context-kit-local-sources.log 2>&1 &
|
||||
local_sources_pid="$!"
|
||||
if ! python - "$local_sources_port" <<'PY'
|
||||
import sys
|
||||
import time
|
||||
import urllib.request
|
||||
|
||||
port = sys.argv[1]
|
||||
last_error = None
|
||||
for _ in range(20):
|
||||
try:
|
||||
with urllib.request.urlopen(f"http://127.0.0.1:{port}/", timeout=0.5) as response:
|
||||
if response.status < 500:
|
||||
raise SystemExit(0)
|
||||
except Exception as error:
|
||||
last_error = error
|
||||
time.sleep(0.1)
|
||||
raise SystemExit(f"local source server did not become ready: {last_error}")
|
||||
PY
|
||||
then
|
||||
kill "$local_sources_pid" 2>/dev/null || true
|
||||
echo "docs-mcp: local source server failed on 127.0.0.1:$local_sources_port" >&2
|
||||
exit 65
|
||||
fi
|
||||
fi
|
||||
|
||||
# By default llms-txt-mcp 0.2.0 re-embeds every source on launch (the actual
|
||||
# default is a background preindex, --no-preindex only disables the foreground
|
||||
# variant). On a long-lived container that just wastes ~5 min of CPU per
|
||||
# restart, so we disable BOTH and let the caller use `docs_refresh` on demand.
|
||||
# Set DOCS_MCP_PREINDEX=1 to restore the eager behavior.
|
||||
# variant). On a long-lived container that wastes CPU per restart, so we disable
|
||||
# BOTH. Missing/stale sources still refresh on first docs_query/docs_refresh.
|
||||
# Set DOCS_MCP_PREINDEX=1 to restore eager startup indexing.
|
||||
preindex_flag="--no-preindex --no-background-preindex"
|
||||
if [ "${DOCS_MCP_PREINDEX:-0}" = "1" ]; then
|
||||
preindex_flag=""
|
||||
|
||||
@@ -1,2 +1,5 @@
|
||||
*
|
||||
!Dockerfile
|
||||
!patch-mcp-web-search.mjs
|
||||
!overrides/
|
||||
!overrides/bing.js
|
||||
|
||||
@@ -1,7 +1,14 @@
|
||||
FROM node:22-bookworm-slim
|
||||
FROM node:22-bookworm-slim@sha256:813a7480f28fdadac1f7f5c824bcdad435b5bc1322a5968bbbdef8d058f9dff4
|
||||
|
||||
ARG MCP_WEB_SEARCH_VERSION=1.3.0
|
||||
ARG MCP_WEB_SEARCH_MAX_BYTES=52428800
|
||||
|
||||
COPY patch-mcp-web-search.mjs /tmp/patch-mcp-web-search.mjs
|
||||
COPY overrides/bing.js /tmp/context-kit-bing-provider.js
|
||||
|
||||
# Chromium intentionally tracks Debian security updates inside the pinned base
|
||||
# image family; Bing's browser path is more likely to break with stale Chromium
|
||||
# than with patched OS packages.
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
@@ -10,11 +17,15 @@ RUN apt-get update \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN npm install -g "@zhafron/mcp-web-search@${MCP_WEB_SEARCH_VERSION}" \
|
||||
&& cp /tmp/context-kit-bing-provider.js /usr/local/lib/node_modules/@zhafron/mcp-web-search/dist/src/providers/bing.js \
|
||||
&& node /tmp/patch-mcp-web-search.mjs \
|
||||
&& rm /tmp/patch-mcp-web-search.mjs /tmp/context-kit-bing-provider.js \
|
||||
&& npm cache clean --force
|
||||
|
||||
ENV CHROME_PATH=/usr/bin/chromium \
|
||||
DEFAULT_SEARCH_PROVIDER=searxng \
|
||||
HTTP_TIMEOUT=15000 \
|
||||
MAX_BYTES=${MCP_WEB_SEARCH_MAX_BYTES} \
|
||||
MAX_RESULTS=10 \
|
||||
SEARXNG_URL=http://searxng:8080
|
||||
|
||||
|
||||
114
docker/web-search/overrides/bing.js
Normal file
114
docker/web-search/overrides/bing.js
Normal file
@@ -0,0 +1,114 @@
|
||||
import { PUPPETEER_TIMEOUT } from "../constants.js";
|
||||
import { browserPool } from "../utils/browser-pool.js";
|
||||
import { getAcceptLanguageHeader, getMarketFromLang } from "../utils/user-agent.js";
|
||||
import { searchCache, createCacheKey } from "../utils/cache.js";
|
||||
|
||||
// Context Kit override for @zhafron/mcp-web-search 1.3.0.
|
||||
// The upstream provider can read Bing before result cards render and return an
|
||||
// empty fallback. Keep this as a direct provider replacement until upstream
|
||||
// waits for cards and decodes current /ck/a redirects reliably.
|
||||
const DEFAULT_BROWSER_SEARCH_USER_AGENT = process.env.BROWSER_SEARCH_USER_AGENT ||
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36";
|
||||
|
||||
function decodeBase64Url(value) {
|
||||
const normalized = value.replace(/-/g, "+").replace(/_/g, "/");
|
||||
const padded = normalized.padEnd(normalized.length + ((4 - normalized.length % 4) % 4), "=");
|
||||
return Buffer.from(padded, "base64").toString("utf-8");
|
||||
}
|
||||
|
||||
export class BingProvider {
|
||||
name = "bing";
|
||||
|
||||
decodeBingRedirect(href) {
|
||||
try {
|
||||
const url = new URL(href, "https://www.bing.com/");
|
||||
if (url.hostname === "www.bing.com" && url.pathname === "/ck/a") {
|
||||
const encoded = url.searchParams.get("u");
|
||||
if (encoded) {
|
||||
const candidates = [encoded];
|
||||
if (/^[a-z][0-9]/i.test(encoded)) candidates.push(encoded.slice(2));
|
||||
for (const candidate of candidates) {
|
||||
try {
|
||||
const decoded = decodeBase64Url(candidate);
|
||||
if (/^https?:\/\//i.test(decoded)) return decoded;
|
||||
}
|
||||
catch { }
|
||||
}
|
||||
}
|
||||
}
|
||||
return url.toString();
|
||||
}
|
||||
catch {
|
||||
return href;
|
||||
}
|
||||
}
|
||||
|
||||
async search(q, limit, lang) {
|
||||
const cacheKey = createCacheKey("bing", q, limit, lang);
|
||||
const cached = searchCache.get(cacheKey);
|
||||
if (cached)
|
||||
return cached;
|
||||
const market = getMarketFromLang(lang);
|
||||
const results = await browserPool.withBrowser(async (browser) => {
|
||||
const page = await browser.newPage();
|
||||
try {
|
||||
await page.setViewport({ width: 1365, height: 768 });
|
||||
await page.setUserAgent(DEFAULT_BROWSER_SEARCH_USER_AGENT);
|
||||
await page.setExtraHTTPHeaders(getAcceptLanguageHeader(lang));
|
||||
const url = new URL("https://www.bing.com/search");
|
||||
url.searchParams.set("q", q);
|
||||
url.searchParams.set("mkt", market);
|
||||
const response = await page.goto(url.toString(), {
|
||||
waitUntil: "domcontentloaded",
|
||||
timeout: PUPPETEER_TIMEOUT
|
||||
});
|
||||
if (response && response.status() >= 400) {
|
||||
throw new Error(`Bing HTTP ${response.status()}`);
|
||||
}
|
||||
await page.waitForSelector("li.b_algo h2 a[href], li.b_algo a[href]", { timeout: 10000 }).catch(() => undefined);
|
||||
const items = await page.evaluate(maxResults => {
|
||||
const parsed = [];
|
||||
for (const card of Array.from(document.querySelectorAll("li.b_algo"))) {
|
||||
const anchor = card.querySelector("h2 a[href]") || card.querySelector("a[href]");
|
||||
const title = anchor?.textContent?.trim() || "";
|
||||
const href = anchor?.getAttribute("href") || "";
|
||||
if (!title || !href)
|
||||
continue;
|
||||
const snippetElement = card.querySelector("div.b_caption p, div.b_snippet, p");
|
||||
const snippet = snippetElement?.textContent?.trim() || undefined;
|
||||
parsed.push({ title, url: href, snippet });
|
||||
if (parsed.length >= maxResults)
|
||||
break;
|
||||
}
|
||||
return parsed;
|
||||
}, limit);
|
||||
return items.flatMap(result => {
|
||||
try {
|
||||
const absolute = new URL(result.url, "https://www.bing.com/").toString();
|
||||
const decoded = this.decodeBingRedirect(absolute);
|
||||
new URL(decoded);
|
||||
return [{ ...result, url: decoded, source: "bing" }];
|
||||
}
|
||||
catch {
|
||||
return [];
|
||||
}
|
||||
});
|
||||
}
|
||||
finally {
|
||||
await page.close();
|
||||
}
|
||||
});
|
||||
searchCache.set(cacheKey, results);
|
||||
return results;
|
||||
}
|
||||
|
||||
async isAvailable() {
|
||||
try {
|
||||
await browserPool.getBrowser();
|
||||
return true;
|
||||
}
|
||||
catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
28
docker/web-search/patch-mcp-web-search.mjs
Normal file
28
docker/web-search/patch-mcp-web-search.mjs
Normal file
@@ -0,0 +1,28 @@
|
||||
import fs from "node:fs";
|
||||
|
||||
// Context Kit patch for @zhafron/mcp-web-search 1.3.0.
|
||||
// Upstream hard-codes the fetch_url schema limit to 25 MiB even though the
|
||||
// runtime extractor already uses MAX_BYTES. Keep this narrow and fail the build
|
||||
// if upstream changes the compiled source shape.
|
||||
const serverPath = "/usr/local/lib/node_modules/@zhafron/mcp-web-search/dist/src/server.js";
|
||||
let source = fs.readFileSync(serverPath, "utf8");
|
||||
|
||||
const replacements = [
|
||||
[
|
||||
'import { MAX_RESULTS } from "./constants.js";',
|
||||
'import { MAX_BYTES, MAX_RESULTS } from "./constants.js";'
|
||||
],
|
||||
[
|
||||
"max_download_bytes: z.number().int().min(1).max(26214400).optional()",
|
||||
"max_download_bytes: z.number().int().min(1).max(MAX_BYTES).optional()"
|
||||
]
|
||||
];
|
||||
|
||||
for (const [before, after] of replacements) {
|
||||
if (!source.includes(before)) {
|
||||
throw new Error(`mcp-web-search patch target not found: ${before}`);
|
||||
}
|
||||
source = source.replace(before, after);
|
||||
}
|
||||
|
||||
fs.writeFileSync(serverPath, source);
|
||||
@@ -15,8 +15,8 @@ search:
|
||||
- json
|
||||
|
||||
server:
|
||||
# Local placeholder. The Docker service also sets SEARXNG_SECRET from .env;
|
||||
# keep SearXNG bound to 127.0.0.1 unless you review this config separately.
|
||||
# Local placeholder. Keep SearXNG bound to 127.0.0.1 unless you review this
|
||||
# config and replace the secret_key for a deliberate non-local deployment.
|
||||
secret_key: "local-only-change-if-exposed"
|
||||
limiter: false
|
||||
image_proxy: true
|
||||
|
||||
@@ -14,6 +14,13 @@ shell code.
|
||||
| `CONTEXT_KIT_DATA_DIR` | `$HOME/.local/share/context-kit` | Persistent docs indexes and model cache |
|
||||
| `CONTEXT_KIT_COMPOSE_PROJECT` | `context-kit` | Docker Compose project and network prefix |
|
||||
| `CONTEXT_KIT_SEARXNG_PORT` | `8099` | Localhost SearXNG port |
|
||||
| `CONTEXT_KIT_WEB_SEARCH_MAX_BYTES` | `52428800` | Max bytes `context-web-search` accepts and downloads per fetch |
|
||||
| `CONTEXT_KIT_WEB_SEARCH_PROVIDER` | `searxng` | Default `search_web` provider; fallback order depends on this provider |
|
||||
| `CONTEXT_KIT_WEB_SEARCH_HTTP_TIMEOUT` | `15000` | HTTP timeout in milliseconds for search providers |
|
||||
| `CONTEXT_KIT_WEB_SEARCH_MAX_RESULTS` | `10` | Default search result count when clients omit `limit` |
|
||||
| `CONTEXT_KIT_WEB_SEARCH_CHROME_PATH` | `/usr/bin/chromium` | Chromium path inside the web-search image for Bing fallback |
|
||||
| `CONTEXT_KIT_WEB_SEARCH_BROWSER_USER_AGENT` | bundled Chrome/Linux UA | User agent for the Chromium-backed Bing fallback |
|
||||
| `CONTEXT_KIT_WEB_SEARCH_MCP_COMPAT_MODE` | unset | Set to `legacy` for MCP clients with weak tool-schema parsers |
|
||||
| `CONTEXT_KIT_DOCS_PORT` | `8776` | Localhost port for the long-lived docs-mcp HTTP service |
|
||||
| `CONTEXT_KIT_DOCS_HTTP_URL` | `http://127.0.0.1:${CONTEXT_KIT_DOCS_PORT}/mcp` | URL emitted into HTTP MCP install snippets |
|
||||
| `CONTEXT_KIT_DOCS_ALLOW_ORIGIN` | unset | Optional exact browser CORS origin(s) for docs-mcp, separated by spaces |
|
||||
@@ -22,6 +29,8 @@ shell code.
|
||||
| `CONTEXT_KIT_DOCS_MAX_GET_BYTES` | `75000` | Max bytes returned by docs retrieval |
|
||||
| `CONTEXT_KIT_DOCS_EMBED_MODEL` | `BAAI/bge-small-en-v1.5` | SentenceTransformers embedding model |
|
||||
| `CONTEXT_KIT_DOCS_PREINDEX` | `0` | Set to `1` to re-embed every source on container start |
|
||||
| `CONTEXT_KIT_DOCS_LOCAL_SOURCES_DIR` | `${CONTEXT_KIT_DATA_DIR}/local-sources` | Machine-local llms.txt tree mounted read-only into docs-mcp |
|
||||
| `CONTEXT_KIT_DOCS_LOCAL_SOURCES_PORT` | `8769` | Loopback port inside docs-mcp for serving local source files |
|
||||
|
||||
## TTL Guidance
|
||||
|
||||
@@ -66,3 +75,8 @@ CONTEXT_KIT_DOCS_SOURCES="config/sources.default.txt config/sources.js.txt"
|
||||
```
|
||||
|
||||
Each source file is plain text. Blank lines and `#` comments are ignored.
|
||||
Entries may be absolute source-profile paths for private machine-local config.
|
||||
For local llms.txt files, place content under
|
||||
`CONTEXT_KIT_DOCS_LOCAL_SOURCES_DIR` and reference it as
|
||||
`http://127.0.0.1:8769/path/inside/local-sources.txt`; that loopback URL is
|
||||
inside the docs-mcp container, not exposed on the host.
|
||||
|
||||
@@ -33,6 +33,37 @@ Build default images:
|
||||
bin/context-kit build
|
||||
```
|
||||
|
||||
## Fetch URL Says Max Download Bytes Is Too Big
|
||||
|
||||
If `fetch_url` fails before making a network request with an MCP validation error
|
||||
like `Number must be less than or equal to 26214400`, rebuild the web-search MCP
|
||||
image:
|
||||
|
||||
```sh
|
||||
bin/context-kit build
|
||||
```
|
||||
|
||||
Context Kit patches the upstream `mcp-web-search` schema so the accepted
|
||||
`max_download_bytes` value matches `CONTEXT_KIT_WEB_SEARCH_MAX_BYTES`, which
|
||||
defaults to `52428800`.
|
||||
|
||||
## Search Fallback and Chromium
|
||||
|
||||
`search_web` defaults to SearXNG. If SearXNG fails or returns no results, the
|
||||
upstream fallback order is DuckDuckGo, then Bing. Bing uses Chromium through
|
||||
Puppeteer, so `bin/context-kit doctor` checks that the configured Chromium path
|
||||
exists inside the web-search image.
|
||||
|
||||
Context Kit carries a source-controlled Bing provider override in
|
||||
`docker/web-search/overrides/bing.js` because the upstream 1.3.0 provider can
|
||||
race result rendering and return no items even when Chromium sees Bing result
|
||||
cards. The override waits for result cards and decodes current Bing redirect
|
||||
URLs before handing results back to the upstream fallback registry.
|
||||
|
||||
`fetch_url` is different: in upstream `mcp-web-search` 1.3.0, `engine=browser` is
|
||||
accepted but reserved for future support. It does not currently invoke Chromium;
|
||||
URL fetching uses the HTTP extractor path.
|
||||
|
||||
## Docs Indexing Is Slow
|
||||
|
||||
The first run downloads an embedding model and embeds every configured docs
|
||||
|
||||
49
scripts/release-check
Executable file
49
scripts/release-check
Executable file
@@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "${ROOT}"
|
||||
|
||||
tmp_dir="$(mktemp -d)"
|
||||
cleanup() {
|
||||
rm -rf "${tmp_dir}"
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
check_node() {
|
||||
local file
|
||||
for file in "$@"; do
|
||||
node --check "${file}"
|
||||
done
|
||||
}
|
||||
|
||||
git diff --check
|
||||
git ls-files --cached --error-unmatch \
|
||||
docker/web-search/patch-mcp-web-search.mjs \
|
||||
docker/web-search/overrides/bing.js \
|
||||
docker/docs/constraints.txt \
|
||||
scripts/smoke-web-search.mjs \
|
||||
scripts/release-check >/dev/null
|
||||
bash -n bin/context-kit
|
||||
bash -n scripts/release-check
|
||||
sh -n docker/docs/entrypoint.sh
|
||||
check_node docker/web-search/patch-mcp-web-search.mjs docker/web-search/overrides/bing.js scripts/smoke-web-search.mjs
|
||||
|
||||
node -e 'const fs=require("node:fs"); JSON.parse(fs.readFileSync("snippets/opencode.json", "utf8")); JSON.parse(fs.readFileSync("snippets/claude.mcp.json", "utf8"));'
|
||||
bin/context-kit install opencode > "${tmp_dir}/opencode.json"
|
||||
bin/context-kit install opencode --absolute > "${tmp_dir}/opencode-absolute.json"
|
||||
bin/context-kit install claude > "${tmp_dir}/claude.json"
|
||||
bin/context-kit install claude --absolute > "${tmp_dir}/claude-absolute.json"
|
||||
node -e 'const fs=require("node:fs"); for (const file of process.argv.slice(1)) JSON.parse(fs.readFileSync(file, "utf8"));' \
|
||||
"${tmp_dir}/opencode.json" \
|
||||
"${tmp_dir}/opencode-absolute.json" \
|
||||
"${tmp_dir}/claude.json" \
|
||||
"${tmp_dir}/claude-absolute.json"
|
||||
|
||||
bin/context-kit redaction-check
|
||||
docker compose -p context-kit -f compose.yml config >/dev/null
|
||||
bin/context-kit build
|
||||
bin/context-kit doctor
|
||||
node scripts/smoke-web-search.mjs bin/context-kit web-search
|
||||
|
||||
printf 'pass release-check\n'
|
||||
152
scripts/smoke-web-search.mjs
Normal file
152
scripts/smoke-web-search.mjs
Normal file
@@ -0,0 +1,152 @@
|
||||
import { spawn } from "node:child_process";
|
||||
|
||||
const command = process.argv[2];
|
||||
const args = process.argv.slice(3);
|
||||
|
||||
if (!command) {
|
||||
throw new Error("usage: node scripts/smoke-web-search.mjs <command> [args...]");
|
||||
}
|
||||
|
||||
const child = spawn(command, args, {
|
||||
cwd: new URL("..", import.meta.url).pathname,
|
||||
env: process.env,
|
||||
stdio: ["pipe", "pipe", "pipe"]
|
||||
});
|
||||
|
||||
let nextId = 1;
|
||||
const pending = new Map();
|
||||
let stdoutBuffer = "";
|
||||
let stderrBuffer = "";
|
||||
|
||||
function stopChild() {
|
||||
child.stdin.end();
|
||||
child.kill("SIGTERM");
|
||||
const killTimer = setTimeout(() => child.kill("SIGKILL"), 3000);
|
||||
return new Promise(resolve => {
|
||||
child.once("exit", () => {
|
||||
clearTimeout(killTimer);
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
const timeout = setTimeout(async () => {
|
||||
await stopChild();
|
||||
console.error(`MCP smoke timed out. stderr: ${stderrBuffer.slice(-2000)}`);
|
||||
process.exit(1);
|
||||
}, 120000);
|
||||
|
||||
child.stderr.on("data", chunk => {
|
||||
stderrBuffer += chunk.toString();
|
||||
});
|
||||
|
||||
child.stdout.on("data", chunk => {
|
||||
stdoutBuffer += chunk.toString();
|
||||
let newline;
|
||||
while ((newline = stdoutBuffer.indexOf("\n")) >= 0) {
|
||||
const line = stdoutBuffer.slice(0, newline).trim();
|
||||
stdoutBuffer = stdoutBuffer.slice(newline + 1);
|
||||
if (!line) continue;
|
||||
let message;
|
||||
try {
|
||||
message = JSON.parse(line);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
if (message.id && pending.has(message.id)) {
|
||||
const { resolve, reject } = pending.get(message.id);
|
||||
pending.delete(message.id);
|
||||
if (message.error) reject(new Error(JSON.stringify(message.error)));
|
||||
else resolve(message.result);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
function request(method, params = {}) {
|
||||
const id = nextId++;
|
||||
child.stdin.write(`${JSON.stringify({ jsonrpc: "2.0", id, method, params })}\n`);
|
||||
return new Promise((resolve, reject) => pending.set(id, { resolve, reject }));
|
||||
}
|
||||
|
||||
function notify(method, params = {}) {
|
||||
child.stdin.write(`${JSON.stringify({ jsonrpc: "2.0", method, params })}\n`);
|
||||
}
|
||||
|
||||
function textFrom(result) {
|
||||
return (result.content || [])
|
||||
.filter(part => part.type === "text")
|
||||
.map(part => part.text)
|
||||
.join("\n");
|
||||
}
|
||||
|
||||
async function callTool(name, args = {}) {
|
||||
return request("tools/call", { name, arguments: args });
|
||||
}
|
||||
|
||||
try {
|
||||
await request("initialize", {
|
||||
protocolVersion: "2024-11-05",
|
||||
capabilities: {},
|
||||
clientInfo: { name: "context-kit-smoke", version: "0.0.0" }
|
||||
});
|
||||
notify("notifications/initialized");
|
||||
|
||||
const listed = await request("tools/list");
|
||||
const toolNames = new Set((listed.tools || []).map(tool => tool.name));
|
||||
for (const name of ["search_web", "fetch_url"]) {
|
||||
if (!toolNames.has(name)) throw new Error(`missing tool: ${name}`);
|
||||
}
|
||||
|
||||
const searxng = textFrom(await callTool("search_web", {
|
||||
q: "Model Context Protocol",
|
||||
limit: 2,
|
||||
provider: "searxng"
|
||||
}));
|
||||
if (!searxng.includes("Model")) throw new Error(`SearXNG smoke returned unexpected text: ${searxng.slice(0, 500)}`);
|
||||
|
||||
const bing = textFrom(await callTool("search_web", {
|
||||
q: "Model Context Protocol",
|
||||
limit: 2,
|
||||
provider: "bing"
|
||||
}));
|
||||
if (!bing.includes("Model")) throw new Error(`Bing smoke returned unexpected text: ${bing.slice(0, 500)}`);
|
||||
|
||||
const fetch = textFrom(await callTool("fetch_url", {
|
||||
url: "https://example.com/",
|
||||
format: "markdown",
|
||||
max_download_bytes: 52428800
|
||||
}));
|
||||
if (!fetch.includes("Example Domain")) throw new Error(`fetch smoke returned unexpected text: ${fetch.slice(0, 500)}`);
|
||||
|
||||
const browserFetch = textFrom(await callTool("fetch_url", {
|
||||
url: "https://example.com/",
|
||||
format: "markdown",
|
||||
engine: "browser",
|
||||
max_download_bytes: 52428800
|
||||
}));
|
||||
if (!browserFetch.includes("Example Domain")) throw new Error(`browser fetch smoke returned unexpected text: ${browserFetch.slice(0, 500)}`);
|
||||
|
||||
const localResult = await callTool("fetch_url", {
|
||||
url: "http://127.0.0.1:1/",
|
||||
max_download_bytes: 52428800
|
||||
});
|
||||
const localBlocked = Boolean(localResult.isError) && textFrom(localResult).includes("Blocked localhost/private URL");
|
||||
if (!localBlocked) throw new Error("localhost/private URL was not blocked as expected");
|
||||
|
||||
clearTimeout(timeout);
|
||||
await stopChild();
|
||||
console.log(JSON.stringify({
|
||||
tools: Array.from(toolNames).sort(),
|
||||
searxng: "pass",
|
||||
bing: "pass",
|
||||
fetch_url: "pass",
|
||||
fetch_url_browser_engine_currently_http: "pass",
|
||||
localhost_guard: "pass"
|
||||
}, null, 2));
|
||||
} catch (error) {
|
||||
clearTimeout(timeout);
|
||||
await stopChild();
|
||||
console.error(error.message);
|
||||
if (stderrBuffer) console.error(stderrBuffer.slice(-4000));
|
||||
process.exit(1);
|
||||
}
|
||||
@@ -5,12 +5,13 @@
|
||||
"type": "local",
|
||||
"command": ["context-kit", "web-search"],
|
||||
"enabled": true,
|
||||
"timeout": 60000
|
||||
"timeout": 150000
|
||||
},
|
||||
"context-docs": {
|
||||
"type": "remote",
|
||||
"url": "http://127.0.0.1:8776/mcp",
|
||||
"enabled": true
|
||||
"enabled": true,
|
||||
"timeout": 150000
|
||||
},
|
||||
"context-repomix": {
|
||||
"type": "local",
|
||||
|
||||
Reference in New Issue
Block a user