From 99881b608bf219c1f5509f49e705e699e8d8ea98 Mon Sep 17 00:00:00 2001 From: Ajay Krishnan Date: Thu, 25 Jun 2026 07:05:08 -0700 Subject: [PATCH] Strengthen release verification gates --- bin/context-kit | 56 ++++++++++-- compose.yml | 8 +- docker/docs/entrypoint.sh | 14 ++- docs/configuration.md | 5 +- scripts/release-check | 30 ++++++- scripts/smoke-docs.mjs | 170 +++++++++++++++++++++++++++++++++++ scripts/smoke-web-search.mjs | 35 +++++++- 7 files changed, 298 insertions(+), 20 deletions(-) create mode 100644 scripts/smoke-docs.mjs diff --git a/bin/context-kit b/bin/context-kit index 2f23532..84eb481 100755 --- a/bin/context-kit +++ b/bin/context-kit @@ -42,9 +42,10 @@ fail() { load_env_file +DEFAULT_DATA_DIR="${HOME:-${PWD}}/.local/share/context-kit" PROJECT="${CONTEXT_KIT_COMPOSE_PROJECT:-context-kit}" COMPOSE_FILE="${ROOT}/compose.yml" -DATA_DIR="${CONTEXT_KIT_DATA_DIR:-${HOME}/.local/share/context-kit}" +DATA_DIR="${CONTEXT_KIT_DATA_DIR:-${DEFAULT_DATA_DIR}}" NETWORK="${CONTEXT_KIT_DOCKER_NETWORK:-${PROJECT}_default}" SEARXNG_PORT="${CONTEXT_KIT_SEARXNG_PORT:-8099}" DOCS_PORT="${CONTEXT_KIT_DOCS_PORT:-8776}" @@ -190,6 +191,17 @@ warn() { printf 'warn: %s\n' "$*" >&2 } +print_relative_paths() { + local path + while IFS= read -r path; do + [[ -n "${path}" ]] || continue + if [[ "${path}" == "${ROOT}/"* ]]; then + path="${path#"${ROOT}/"}" + fi + printf '%s\n' "${path}" + done +} + json_escape() { local s="$1" s="${s//\\/\\\\}" @@ -314,8 +326,9 @@ cmd_status() { printf '\nImages\n' docker image ls --format '{{.Repository}}:{{.Tag}}\t{{.Size}}' \ | grep -E '^(context-kit/|ghcr.io/yamadashy/repomix:)' || true - printf '\nLabeled containers\n' - docker ps -a --filter label=dev.context-kit=true --format 'table {{.Names}}\t{{.Status}}\t{{.Image}}' + printf '\nActive per-call MCP containers\n' + docker ps -a --filter label=dev.context-kit=true --format '{{.Names}}\t{{.Status}}\t{{.Image}}\t{{.Command}}' \ + | awk 'BEGIN { print "NAMES\tSTATUS\tIMAGE\tCOMMAND" } $1 !~ /^context-kit-(docs-mcp|searxng-1)$/ { print }' printf '\nDocs MCP endpoint\n- %s (container: %s)\n' "${DOCS_HTTP_URL}" "${DOCS_CONTAINER_NAME}" printf '\nDocs sources\n' resolved_sources | sed 's/^/- /' @@ -387,13 +400,15 @@ cmd_doctor() { if command -v curl >/dev/null 2>&1 && curl -fsS "http://127.0.0.1:${SEARXNG_PORT}/healthz" >/dev/null 2>&1; then printf 'pass SearXNG responds on 127.0.0.1:%s\n' "${SEARXNG_PORT}" else - printf 'warn SearXNG not responding on 127.0.0.1:%s\n' "${SEARXNG_PORT}" + printf 'fail SearXNG not responding on 127.0.0.1:%s\n' "${SEARXNG_PORT}" + ok=1 fi if command -v curl >/dev/null 2>&1 && curl -fsS -o /dev/null "http://127.0.0.1:${DOCS_PORT}/status" 2>/dev/null; then printf 'pass docs-mcp HTTP responds on 127.0.0.1:%s\n' "${DOCS_PORT}" else - printf 'warn docs-mcp HTTP not responding on 127.0.0.1:%s (run context-kit start)\n' "${DOCS_PORT}" + printf 'fail docs-mcp HTTP not responding on 127.0.0.1:%s (run context-kit start)\n' "${DOCS_PORT}" + ok=1 fi if [[ "$(resolved_sources | wc -l | tr -d ' ')" -gt 0 ]]; then @@ -409,8 +424,13 @@ cmd_web_search() { require_docker require_network require_image "${WEB_SEARCH_IMAGE}" "context-kit build" + local cidfile_args=() + if [[ -n "${CONTEXT_KIT_DOCKER_CIDFILE:-}" ]]; then + cidfile_args=(--cidfile "${CONTEXT_KIT_DOCKER_CIDFILE}") + fi exec docker run --rm -i \ --label dev.context-kit=true \ + "${cidfile_args[@]}" \ --network "${NETWORK}" \ -e DEFAULT_SEARCH_PROVIDER="${WEB_SEARCH_PROVIDER}" \ -e SEARXNG_URL="${SEARXNG_URL:-http://searxng:8080}" \ @@ -438,8 +458,13 @@ cmd_docs() { fi local bridge_url="http://${DOCS_CONTAINER_NAME}:8000/mcp" + local cidfile_args=() + if [[ -n "${CONTEXT_KIT_DOCKER_CIDFILE:-}" ]]; then + cidfile_args=(--cidfile "${CONTEXT_KIT_DOCKER_CIDFILE}") + fi exec docker run --rm -i \ --label dev.context-kit=true \ + "${cidfile_args[@]}" \ --network "${NETWORK}" \ --entrypoint mcp-proxy \ "${DOCS_IMAGE}" \ @@ -454,8 +479,13 @@ cmd_repomix() { dir="$(project_dir)" mount_dir="${CONTEXT_KIT_REPOMIX_MOUNT_DIR:-${dir}}" mount_dir="$(cd "${mount_dir}" && pwd -P)" + local cidfile_args=() + if [[ -n "${CONTEXT_KIT_DOCKER_CIDFILE:-}" ]]; then + cidfile_args=(--cidfile "${CONTEXT_KIT_DOCKER_CIDFILE}") + fi exec docker run --rm -i \ --label dev.context-kit=true \ + "${cidfile_args[@]}" \ -v "${mount_dir}:${mount_dir}:ro" \ --workdir "${dir}" \ "${REPOMIX_IMAGE}" --mcp @@ -536,6 +566,10 @@ cmd_install() { cmd_redaction_check() { local bad=0 + local scan_paths=("${ROOT}") + if [[ "$#" -gt 0 ]]; then + scan_paths=("$@") + fi local local_path_terms='/(home|Users)/[^/[:space:]]+|/data/(projects|opencode-mcp)[^[:space:]]*|[A-Za-z]:\\Users\\[^\\[:space:]]+' local secret_terms='AKIA[0-9A-Z]{16}|BEGIN (RSA |OPENSSH |EC |DSA )?PRIVATE KEY|xox[baprs]-|sk-[A-Za-z0-9_-]{20,}|ghp_[A-Za-z0-9_]{20,}|github_pat_[A-Za-z0-9_]{20,}|glpat-[A-Za-z0-9_-]{20,}|gitea_[A-Za-z0-9_-]{20,}' @@ -551,10 +585,18 @@ cmd_redaction_check() { --exclude=*.log ) - if grep "${grep_opts[@]}" "${local_path_terms}" "${ROOT}"; then + local matches + matches="$(grep "${grep_opts[@]}" --files-with-matches "${local_path_terms}" "${scan_paths[@]}" 2>/dev/null || true)" + if [[ -n "${matches}" ]]; then + printf 'fail redaction-check found local path patterns in:\n' >&2 + printf '%s\n' "${matches}" | print_relative_paths | sed 's/^/- /' >&2 bad=1 fi - if grep "${grep_opts[@]}" "${secret_terms}" "${ROOT}"; then + + matches="$(grep "${grep_opts[@]}" --files-with-matches "${secret_terms}" "${scan_paths[@]}" 2>/dev/null || true)" + if [[ -n "${matches}" ]]; then + printf 'fail redaction-check found secret-like patterns in:\n' >&2 + printf '%s\n' "${matches}" | print_relative_paths | sed 's/^/- /' >&2 bad=1 fi diff --git a/compose.yml b/compose.yml index 2afa332..6f6a80a 100644 --- a/compose.yml +++ b/compose.yml @@ -62,10 +62,10 @@ services: # refresh on demand. Set CONTEXT_KIT_DOCS_PREINDEX=1 to restore eager. DOCS_MCP_PREINDEX: "${CONTEXT_KIT_DOCS_PREINDEX:-0}" volumes: - - ${CONTEXT_KIT_DATA_DIR:-${HOME}/.local/share/context-kit}/docs:/data - - ${CONTEXT_KIT_DATA_DIR:-${HOME}/.local/share/context-kit}/models:/models - - ${CONTEXT_KIT_DATA_DIR:-${HOME}/.local/share/context-kit}/docs-sources.txt:/etc/context-kit/docs-sources.txt:ro - - ${CONTEXT_KIT_DOCS_LOCAL_SOURCES_DIR:-${CONTEXT_KIT_DATA_DIR:-${HOME}/.local/share/context-kit}/local-sources}:/etc/context-kit/local-sources:ro + - ${CONTEXT_KIT_DATA_DIR:-${HOME:?Set HOME or CONTEXT_KIT_DATA_DIR, or run bin/context-kit instead}/.local/share/context-kit}/docs:/data + - ${CONTEXT_KIT_DATA_DIR:-${HOME:?Set HOME or CONTEXT_KIT_DATA_DIR, or run bin/context-kit instead}/.local/share/context-kit}/models:/models + - ${CONTEXT_KIT_DATA_DIR:-${HOME:?Set HOME or CONTEXT_KIT_DATA_DIR, or run bin/context-kit instead}/.local/share/context-kit}/docs-sources.txt:/etc/context-kit/docs-sources.txt:ro + - ${CONTEXT_KIT_DOCS_LOCAL_SOURCES_DIR:-${CONTEXT_KIT_DATA_DIR:-${HOME:?Set HOME or CONTEXT_KIT_DATA_DIR, or run bin/context-kit instead}/.local/share/context-kit}/local-sources}:/etc/context-kit/local-sources:ro healthcheck: test: ["CMD-SHELL", "python -c \"import urllib.request,sys; sys.exit(0 if urllib.request.urlopen('http://127.0.0.1:8000/status', timeout=2).status < 500 else 1)\""] interval: 30s diff --git a/docker/docs/entrypoint.sh b/docker/docs/entrypoint.sh index 4cba799..f398d5c 100644 --- a/docker/docs/entrypoint.sh +++ b/docker/docs/entrypoint.sh @@ -21,14 +21,24 @@ if [ ! -r "$sources_file" ]; then exit 64 fi -# Strip comments and blank lines, then collapse whitespace into a flat list. -sources=$(grep -vE '^[[:space:]]*(#|$)' "$sources_file" | tr -s '[:space:]' '\n' | grep -v '^$' || true) +# Strip inline comments and blank lines, then collapse whitespace into a flat list. +sources=$(sed 's/#.*//' "$sources_file" | tr -s '[:space:]' '\n' | grep -v '^$' || true) if [ -z "$sources" ]; then echo "docs-mcp: no sources found in $sources_file after stripping comments/blanks" >&2 exit 64 fi +for source_url in $sources; do + case "$source_url" in + */llms.txt|*/llms-full.txt) ;; + *) + echo "docs-mcp: source URL must end with /llms.txt or /llms-full.txt: $source_url" >&2 + exit 64 + ;; + esac +done + if [ -d "$local_sources_dir" ]; then python -m http.server "$local_sources_port" \ --bind 127.0.0.1 \ diff --git a/docs/configuration.md b/docs/configuration.md index c01fd66..e958036 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -78,5 +78,6 @@ Each source file is plain text. Blank lines and `#` comments are ignored. Entries may be absolute source-profile paths for private machine-local config. For local llms.txt files, place content under `CONTEXT_KIT_DOCS_LOCAL_SOURCES_DIR` and reference it as -`http://127.0.0.1:8769/path/inside/local-sources.txt`; that loopback URL is -inside the docs-mcp container, not exposed on the host. +`http://127.0.0.1:8769/path/inside/local-sources/llms.txt` or another URL that +ends in `/llms.txt` or `/llms-full.txt`; that loopback URL is inside the docs-mcp +container, not exposed on the host. diff --git a/scripts/release-check b/scripts/release-check index 0aa2e1c..66b0b53 100755 --- a/scripts/release-check +++ b/scripts/release-check @@ -17,17 +17,34 @@ check_node() { done } -git diff --check +assert_redaction_check_does_not_disclose_matches() { + local fixture="${tmp_dir}/redaction-fixture.txt" + local output="${tmp_dir}/redaction-output.txt" + local blocked_path="/data/proj""ects/context-kit-private-fixture" + printf 'blocked=%s\n' "${blocked_path}" > "${fixture}" + if bin/context-kit redaction-check "${fixture}" >"${output}" 2>&1; then + printf 'redaction-check test unexpectedly passed\n' >&2 + return 1 + fi + if grep -F "${blocked_path}" "${output}" >/dev/null; then + printf 'redaction-check disclosed matched content\n' >&2 + return 1 + fi +} + +git diff --check HEAD +git show --check --format= HEAD >/dev/null git ls-files --cached --error-unmatch \ docker/web-search/patch-mcp-web-search.mjs \ docker/web-search/overrides/bing.js \ docker/docs/constraints.txt \ scripts/smoke-web-search.mjs \ + scripts/smoke-docs.mjs \ scripts/release-check >/dev/null bash -n bin/context-kit bash -n scripts/release-check sh -n docker/docs/entrypoint.sh -check_node docker/web-search/patch-mcp-web-search.mjs docker/web-search/overrides/bing.js scripts/smoke-web-search.mjs +check_node docker/web-search/patch-mcp-web-search.mjs docker/web-search/overrides/bing.js scripts/smoke-web-search.mjs scripts/smoke-docs.mjs node -e 'const fs=require("node:fs"); JSON.parse(fs.readFileSync("snippets/opencode.json", "utf8")); JSON.parse(fs.readFileSync("snippets/claude.mcp.json", "utf8"));' bin/context-kit install opencode > "${tmp_dir}/opencode.json" @@ -39,11 +56,20 @@ node -e 'const fs=require("node:fs"); for (const file of process.argv.slice(1)) "${tmp_dir}/opencode-absolute.json" \ "${tmp_dir}/claude.json" \ "${tmp_dir}/claude-absolute.json" +bin/context-kit redaction-check "${tmp_dir}/opencode.json" "${tmp_dir}/claude.json" +assert_redaction_check_does_not_disclose_matches bin/context-kit redaction-check docker compose -p context-kit -f compose.yml config >/dev/null +if env -u HOME docker compose --env-file /dev/null -p context-kit-release-home-check -f compose.yml config >"${tmp_dir}/compose-no-home.out" 2>"${tmp_dir}/compose-no-home.err"; then + printf 'compose config unexpectedly succeeded without HOME or CONTEXT_KIT_DATA_DIR\n' >&2 + exit 1 +fi +CONTEXT_KIT_DATA_DIR="${tmp_dir}/compose-data" env -u HOME docker compose --env-file /dev/null -p context-kit-release-home-check -f compose.yml config >/dev/null bin/context-kit build +bin/context-kit restart bin/context-kit doctor node scripts/smoke-web-search.mjs bin/context-kit web-search +node scripts/smoke-docs.mjs bin/context-kit docs printf 'pass release-check\n' diff --git a/scripts/smoke-docs.mjs b/scripts/smoke-docs.mjs new file mode 100644 index 0000000..3ddc4d0 --- /dev/null +++ b/scripts/smoke-docs.mjs @@ -0,0 +1,170 @@ +import { spawn, spawnSync } from "node:child_process"; +import { existsSync, mkdtempSync, readFileSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +const command = process.argv[2]; +const args = process.argv.slice(3); + +if (!command) { + throw new Error("usage: node scripts/smoke-docs.mjs [args...]"); +} + +const tmpDir = mkdtempSync(join(tmpdir(), "context-kit-docs-smoke-")); +const cidFile = join(tmpDir, "container.cid"); + +const child = spawn(command, args, { + cwd: new URL("..", import.meta.url).pathname, + env: { ...process.env, CONTEXT_KIT_DOCKER_CIDFILE: cidFile }, + stdio: ["pipe", "pipe", "pipe"] +}); + +let nextId = 1; +const pending = new Map(); +let stdoutBuffer = ""; +let stderrBuffer = ""; +let childExited = false; + +child.once("exit", (code, signal) => { + childExited = true; + if (pending.size > 0) { + const error = new Error(`MCP child exited before responding (code=${code}, signal=${signal}). stderr: ${stderrBuffer.slice(-2000)}`); + for (const { reject } of pending.values()) reject(error); + pending.clear(); + } +}); + +function stopChild() { + return new Promise(resolve => { + if (childExited) { + stopContainer(); + rmSync(tmpDir, { recursive: true, force: true }); + resolve(); + return; + } + + child.stdin.end(); + const stopTimer = setTimeout(() => { + stopContainer(); + }, 1000); + const termTimer = setTimeout(() => { + if (!childExited) child.kill("SIGTERM"); + }, 3000); + const killTimer = setTimeout(() => { + if (!childExited) child.kill("SIGKILL"); + }, 6000); + + child.once("exit", () => { + stopContainer(); + clearTimeout(stopTimer); + clearTimeout(termTimer); + clearTimeout(killTimer); + rmSync(tmpDir, { recursive: true, force: true }); + resolve(); + }); + }); +} + +function stopContainer() { + if (!existsSync(cidFile)) return; + const containerId = readFileSync(cidFile, "utf8").trim(); + if (!containerId) return; + spawnSync("docker", ["stop", containerId], { stdio: "ignore" }); +} + +const timeout = setTimeout(async () => { + await stopChild(); + console.error(`Docs MCP smoke timed out. stderr: ${stderrBuffer.slice(-2000)}`); + process.exit(1); +}, 300000); + +child.stderr.on("data", chunk => { + stderrBuffer += chunk.toString(); +}); + +child.stdout.on("data", chunk => { + stdoutBuffer += chunk.toString(); + let newline; + while ((newline = stdoutBuffer.indexOf("\n")) >= 0) { + const line = stdoutBuffer.slice(0, newline).trim(); + stdoutBuffer = stdoutBuffer.slice(newline + 1); + if (!line) continue; + let message; + try { + message = JSON.parse(line); + } catch { + continue; + } + if (message.id && pending.has(message.id)) { + const { resolve, reject } = pending.get(message.id); + pending.delete(message.id); + if (message.error) reject(new Error(JSON.stringify(message.error))); + else resolve(message.result); + } + } +}); + +function request(method, params = {}) { + if (childExited) { + return Promise.reject(new Error(`MCP child already exited. stderr: ${stderrBuffer.slice(-2000)}`)); + } + const id = nextId++; + child.stdin.write(`${JSON.stringify({ jsonrpc: "2.0", id, method, params })}\n`); + return new Promise((resolve, reject) => pending.set(id, { resolve, reject })); +} + +function notify(method, params = {}) { + child.stdin.write(`${JSON.stringify({ jsonrpc: "2.0", method, params })}\n`); +} + +async function callTool(name, args = {}) { + return request("tools/call", { name, arguments: args }); +} + +try { + await request("initialize", { + protocolVersion: "2024-11-05", + capabilities: {}, + clientInfo: { name: "context-kit-docs-smoke", version: "0.0.0" } + }); + notify("notifications/initialized"); + + const listed = await request("tools/list"); + const toolNames = new Set((listed.tools || []).map(tool => tool.name)); + for (const name of ["docs_query", "docs_sources"]) { + if (!toolNames.has(name)) throw new Error(`missing tool: ${name}`); + } + + const sources = await callTool("docs_sources"); + const sourcesText = JSON.stringify(sources); + if (sources.isError) { + throw new Error(`docs_sources returned an error: ${sourcesText.slice(0, 500)}`); + } + + const query = await callTool("docs_query", { + query: "Model Context Protocol documentation", + limit: 3, + auto_retrieve: true, + auto_retrieve_threshold: 0.1, + auto_retrieve_limit: 1, + max_bytes: 12000 + }); + const queryText = JSON.stringify(query); + if (!queryText.includes("search_results") && !queryText.includes("Model Context Protocol")) { + throw new Error(`docs_query returned unexpected payload: ${queryText.slice(0, 500)}`); + } + + clearTimeout(timeout); + await stopChild(); + console.log(JSON.stringify({ + tools: Array.from(toolNames).sort(), + docs_sources: "pass", + docs_query: "pass" + }, null, 2)); +} catch (error) { + clearTimeout(timeout); + await stopChild(); + console.error(error.message); + if (stderrBuffer) console.error(stderrBuffer.slice(-4000)); + process.exit(1); +} diff --git a/scripts/smoke-web-search.mjs b/scripts/smoke-web-search.mjs index b9f5c11..d4e3bc9 100644 --- a/scripts/smoke-web-search.mjs +++ b/scripts/smoke-web-search.mjs @@ -1,4 +1,7 @@ -import { spawn } from "node:child_process"; +import { spawn, spawnSync } from "node:child_process"; +import { existsSync, mkdtempSync, readFileSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; const command = process.argv[2]; const args = process.argv.slice(3); @@ -7,9 +10,12 @@ if (!command) { throw new Error("usage: node scripts/smoke-web-search.mjs [args...]"); } +const tmpDir = mkdtempSync(join(tmpdir(), "context-kit-web-search-smoke-")); +const cidFile = join(tmpDir, "container.cid"); + const child = spawn(command, args, { cwd: new URL("..", import.meta.url).pathname, - env: process.env, + env: { ...process.env, CONTEXT_KIT_DOCKER_CIDFILE: cidFile }, stdio: ["pipe", "pipe", "pipe"] }); @@ -19,18 +25,28 @@ let stdoutBuffer = ""; let stderrBuffer = ""; let childExited = false; -child.once("exit", () => { +child.once("exit", (code, signal) => { childExited = true; + if (pending.size > 0) { + const error = new Error(`MCP child exited before responding (code=${code}, signal=${signal}). stderr: ${stderrBuffer.slice(-2000)}`); + for (const { reject } of pending.values()) reject(error); + pending.clear(); + } }); function stopChild() { return new Promise(resolve => { if (childExited) { + stopContainer(); + rmSync(tmpDir, { recursive: true, force: true }); resolve(); return; } child.stdin.end(); + const stopTimer = setTimeout(() => { + stopContainer(); + }, 1000); const termTimer = setTimeout(() => { if (!childExited) child.kill("SIGTERM"); }, 3000); @@ -39,13 +55,23 @@ function stopChild() { }, 6000); child.once("exit", () => { + stopContainer(); + clearTimeout(stopTimer); clearTimeout(termTimer); clearTimeout(killTimer); + rmSync(tmpDir, { recursive: true, force: true }); resolve(); }); }); } +function stopContainer() { + if (!existsSync(cidFile)) return; + const containerId = readFileSync(cidFile, "utf8").trim(); + if (!containerId) return; + spawnSync("docker", ["stop", containerId], { stdio: "ignore" }); +} + const timeout = setTimeout(async () => { await stopChild(); console.error(`MCP smoke timed out. stderr: ${stderrBuffer.slice(-2000)}`); @@ -79,6 +105,9 @@ child.stdout.on("data", chunk => { }); function request(method, params = {}) { + if (childExited) { + return Promise.reject(new Error(`MCP child already exited. stderr: ${stderrBuffer.slice(-2000)}`)); + } const id = nextId++; child.stdin.write(`${JSON.stringify({ jsonrpc: "2.0", id, method, params })}\n`); return new Promise((resolve, reject) => pending.set(id, { resolve, reject }));