Run docs-mcp as a long-lived shared HTTP service (#1)

Co-authored-by: Ajay Krishnan <ajay@krishnan.ca>
Co-committed-by: Ajay Krishnan <ajay@krishnan.ca>
This commit was merged in pull request #1.
This commit is contained in:
2026-05-24 15:48:24 +00:00
committed by Ajay Krishnan
parent f7ab811d93
commit 6629a9b284
10 changed files with 210 additions and 63 deletions

View File

@@ -37,6 +37,10 @@ COMPOSE_FILE="${ROOT}/compose.yml"
DATA_DIR="${CONTEXT_KIT_DATA_DIR:-${HOME}/.local/share/context-kit}"
NETWORK="${CONTEXT_KIT_DOCKER_NETWORK:-${PROJECT}_default}"
SEARXNG_PORT="${CONTEXT_KIT_SEARXNG_PORT:-8099}"
DOCS_PORT="${CONTEXT_KIT_DOCS_PORT:-8776}"
DOCS_HTTP_URL="${CONTEXT_KIT_DOCS_HTTP_URL:-http://127.0.0.1:${DOCS_PORT}/mcp}"
DOCS_CONTAINER_NAME="context-kit-docs-mcp"
DOCS_SOURCES_FILE="${DATA_DIR}/docs-sources.txt"
WEB_SEARCH_IMAGE="${CONTEXT_KIT_WEB_SEARCH_IMAGE:-context-kit/web-search-mcp:latest}"
DOCS_IMAGE="${CONTEXT_KIT_DOCS_IMAGE:-context-kit/docs-mcp:latest}"
@@ -47,18 +51,20 @@ usage() {
context-kit: local context tools for coding agents
Usage:
context-kit start Start SearXNG and ensure default images exist
context-kit stop Stop the SearXNG service
context-kit restart Restart SearXNG
context-kit start Start SearXNG + the long-lived docs-mcp service
context-kit stop Stop SearXNG + docs-mcp
context-kit restart Restart SearXNG + docs-mcp
context-kit build Build MCP images
context-kit status Show services, images, and configured docs sources
context-kit doctor Check Docker, services, images, and sources
context-kit status Show services, images, sources, and the docs HTTP endpoint
context-kit doctor Check Docker, services, images, sources, and HTTP endpoints
context-kit redaction-check Scan this repo for local paths and secret patterns
MCP server commands:
context-kit web-search Run the SearXNG-backed web-search MCP server
context-kit docs Run the local llms.txt docs MCP server
context-kit repomix Run Repomix MCP for the current project
context-kit web-search Per-call SearXNG-backed web-search MCP (stdio)
context-kit docs Stdio bridge to the long-lived docs-mcp service
(clients that speak HTTP MCP should connect
directly to the URL printed by `status`)
context-kit repomix Per-call Repomix MCP for the current project (stdio)
Assistant snippets:
context-kit install claude Print a project .mcp.json snippet using context-kit on PATH
@@ -71,10 +77,27 @@ USAGE
compose() {
CONTEXT_KIT_DATA_DIR="${DATA_DIR}" \
CONTEXT_KIT_SEARXNG_PORT="${SEARXNG_PORT}" \
CONTEXT_KIT_DOCS_PORT="${DOCS_PORT}" \
CONTEXT_KIT_DOCS_UID="$(id -u)" \
CONTEXT_KIT_DOCS_GID="$(id -g)" \
CONTEXT_KIT_DOCS_TTL="${CONTEXT_KIT_DOCS_TTL:-24h}" \
CONTEXT_KIT_DOCS_MAX_GET_BYTES="${CONTEXT_KIT_DOCS_MAX_GET_BYTES:-75000}" \
CONTEXT_KIT_DOCS_EMBED_MODEL="${CONTEXT_KIT_DOCS_EMBED_MODEL:-BAAI/bge-small-en-v1.5}" \
CONTEXT_KIT_DOCS_PREINDEX="${CONTEXT_KIT_DOCS_PREINDEX:-0}" \
BUILDX_BUILDER="${CONTEXT_KIT_BUILDX_BUILDER:-${BUILDX_BUILDER:-default}}" \
docker compose -p "${PROJECT}" -f "${COMPOSE_FILE}" "$@"
}
write_docs_sources_file() {
mkdir -p "$(dirname "${DOCS_SOURCES_FILE}")"
local tmp="${DOCS_SOURCES_FILE}.tmp.$$"
{
printf '# generated by context-kit start; edit your CONTEXT_KIT_DOCS_SOURCES file(s) instead\n'
resolved_sources
} > "${tmp}"
mv "${tmp}" "${DOCS_SOURCES_FILE}"
}
warn() {
printf 'warn: %s\n' "$*" >&2
}
@@ -118,6 +141,21 @@ wait_for_searxng() {
warn "SearXNG did not become ready on 127.0.0.1:${SEARXNG_PORT} after 30s"
}
wait_for_docs_mcp() {
command -v curl >/dev/null 2>&1 || return 0
# First-run can take a while: model download + full preindex of every source.
local attempt
for attempt in {1..180}; do
if curl -fsS -o /dev/null "http://127.0.0.1:${DOCS_PORT}/status" 2>/dev/null; then
return 0
fi
sleep 1
done
warn "docs-mcp did not become ready on 127.0.0.1:${DOCS_PORT} after 180s (check: docker logs ${DOCS_CONTAINER_NAME})"
}
abs_dir() {
local path="$1"
mkdir -p "${path}"
@@ -157,7 +195,10 @@ resolved_sources() {
cmd_build() {
[[ "$#" -eq 0 ]] || fail "usage: context-kit build"
require_docker
compose --profile mcp build web-search-mcp docs-mcp
# web-search-mcp is still profile-gated (built but not auto-started);
# docs-mcp is a regular long-lived service so it builds without a profile.
compose --profile mcp build web-search-mcp
compose build docs-mcp
docker pull "${REPOMIX_IMAGE}"
}
@@ -167,13 +208,15 @@ cmd_start() {
if ! docker image inspect "${WEB_SEARCH_IMAGE}" >/dev/null 2>&1 || ! docker image inspect "${DOCS_IMAGE}" >/dev/null 2>&1; then
cmd_build
fi
compose up -d searxng
write_docs_sources_file
compose up -d searxng docs-mcp
wait_for_searxng
wait_for_docs_mcp
}
cmd_stop() {
require_docker
compose stop searxng
compose stop searxng docs-mcp
}
cmd_status() {
@@ -183,6 +226,7 @@ cmd_status() {
printf '\nImages\n'
docker image ls --format '{{.Repository}}:{{.Tag}}\t{{.Size}}' \
| grep -E '^(context-kit/|ghcr.io/yamadashy/repomix:)' || true
printf '\nDocs MCP endpoint\n- %s (container: %s)\n' "${DOCS_HTTP_URL}" "${DOCS_CONTAINER_NAME}"
printf '\nDocs sources\n'
resolved_sources | sed 's/^/- /'
printf '\nData directory\n- %s\n' "${DATA_DIR}"
@@ -230,6 +274,12 @@ cmd_doctor() {
printf 'warn SearXNG not responding on 127.0.0.1:%s\n' "${SEARXNG_PORT}"
fi
if command -v curl >/dev/null 2>&1 && curl -fsS -o /dev/null "http://127.0.0.1:${DOCS_PORT}/status" 2>/dev/null; then
printf 'pass docs-mcp HTTP responds on 127.0.0.1:%s\n' "${DOCS_PORT}"
else
printf 'warn docs-mcp HTTP not responding on 127.0.0.1:%s (run context-kit start)\n' "${DOCS_PORT}"
fi
if [[ "$(resolved_sources | wc -l | tr -d ' ')" -gt 0 ]]; then
printf 'pass docs sources resolve\n'
else
@@ -255,34 +305,24 @@ cmd_web_search() {
}
cmd_docs() {
# Prefer the `type: remote` MCP config pointing at ${DOCS_HTTP_URL}.
# This stdio entrypoint is kept for clients that cannot speak HTTP MCP:
# it spawns a thin mcp-proxy bridge per call but all calls multiplex onto
# the single long-lived docs-mcp container (no Chroma write contention).
require_docker
require_image "${DOCS_IMAGE}" "context-kit build"
local docs_dir models_dir ttl max_get_bytes embed_model
docs_dir="$(abs_dir "${DATA_DIR}/docs")"
models_dir="$(abs_dir "${DATA_DIR}/models")"
ttl="${CONTEXT_KIT_DOCS_TTL:-7d}"
max_get_bytes="${CONTEXT_KIT_DOCS_MAX_GET_BYTES:-75000}"
embed_model="${CONTEXT_KIT_DOCS_EMBED_MODEL:-BAAI/bge-small-en-v1.5}"
local sources=() source
while IFS= read -r source; do
sources+=("${source}")
done < <(resolved_sources)
[[ "${#sources[@]}" -gt 0 ]] || fail "no docs sources configured"
if ! docker ps --filter "name=^${DOCS_CONTAINER_NAME}$" --filter "status=running" --format '{{.Names}}' | grep -qx "${DOCS_CONTAINER_NAME}"; then
fail "long-lived docs-mcp not running; start it with: context-kit start"
fi
exec docker run --rm -i \
--label dev.context-kit=true \
--user "$(id -u):$(id -g)" \
-e HOME=/tmp \
-e USER=context-kit \
-e LOGNAME=context-kit \
-e TORCHINDUCTOR_CACHE_DIR=/tmp/torchinductor \
-v "${docs_dir}:/data" \
-v "${models_dir}:/models" \
--network host \
--entrypoint mcp-proxy \
"${DOCS_IMAGE}" \
--store-path /data \
--ttl "${ttl}" \
--max-get-bytes "${max_get_bytes}" \
--embed-model "${embed_model}" \
"${sources[@]}"
--transport streamablehttp \
"${DOCS_HTTP_URL}"
}
cmd_repomix() {
@@ -308,8 +348,9 @@ snippet_command() {
}
print_opencode() {
local bin
local bin url
bin="$(json_escape "$(snippet_command "${1:-}")")"
url="$(json_escape "${DOCS_HTTP_URL}")"
cat <<JSON
{
"\$schema": "https://opencode.ai/config.json",
@@ -321,10 +362,9 @@ print_opencode() {
"timeout": 60000
},
"context-docs": {
"type": "local",
"command": ["${bin}", "docs"],
"enabled": true,
"timeout": 120000
"type": "remote",
"url": "${url}",
"enabled": true
},
"context-repomix": {
"type": "local",
@@ -338,8 +378,9 @@ JSON
}
print_claude() {
local bin
local bin url
bin="$(json_escape "$(snippet_command "${1:-}")")"
url="$(json_escape "${DOCS_HTTP_URL}")"
cat <<JSON
{
"mcpServers": {
@@ -348,8 +389,8 @@ print_claude() {
"args": ["web-search"]
},
"context-docs": {
"command": "${bin}",
"args": ["docs"]
"type": "http",
"url": "${url}"
},
"context-repomix": {
"command": "${bin}",