Run docs-mcp as a long-lived shared HTTP service #1

Merged
ajaynomics merged 1 commits from feat/docs-mcp-shared-http into main 2026-05-24 15:48:25 +00:00
10 changed files with 210 additions and 63 deletions
Showing only changes of commit b4e863562c - Show all commits

View File

@@ -14,10 +14,19 @@ CONTEXT_KIT_SEARXNG_PORT=8099
# beyond localhost, which the default setup does not do.
CONTEXT_KIT_SEARXNG_SECRET=change-me-local-only
# Long-lived context-docs HTTP MCP service. Bound to 127.0.0.1 only.
CONTEXT_KIT_DOCS_PORT=8776
# Override only if you proxy the service behind another hostname or path.
# CONTEXT_KIT_DOCS_HTTP_URL=http://127.0.0.1:8776/mcp
# Docs indexing defaults.
CONTEXT_KIT_DOCS_TTL=7d
CONTEXT_KIT_DOCS_TTL=24h
CONTEXT_KIT_DOCS_MAX_GET_BYTES=75000
CONTEXT_KIT_DOCS_EMBED_MODEL=BAAI/bge-small-en-v1.5
# Eagerly index every source on container start. Off by default so startup is
# fast; call the docs_refresh MCP tool when you want to populate the index.
# CONTEXT_KIT_DOCS_PREINDEX=1
# One or more source files, separated by spaces.
CONTEXT_KIT_DOCS_SOURCES=config/sources.default.txt

View File

@@ -56,8 +56,12 @@ config that will not be committed.
## Defaults
- SearXNG binds to `127.0.0.1:8099` only.
- `context-docs` runs as a long-lived service on `127.0.0.1:8776` (Streamable
HTTP MCP) so every client shares one indexer and one Chroma writer. The
`bin/context-kit docs` stdio command is kept as a compatibility shim for
clients that cannot speak HTTP MCP.
- Docs and model caches live in `$HOME/.local/share/context-kit`.
- Docs refresh TTL defaults to `7d`.
- Docs refresh TTL defaults to `24h`.
- MCP containers are labeled `dev.context-kit=true` for safe inspection and cleanup.
- Repomix mounts only the current project read-only, not your whole home directory.
- No code-editing MCP server is enabled by default.

View File

@@ -37,6 +37,10 @@ COMPOSE_FILE="${ROOT}/compose.yml"
DATA_DIR="${CONTEXT_KIT_DATA_DIR:-${HOME}/.local/share/context-kit}"
NETWORK="${CONTEXT_KIT_DOCKER_NETWORK:-${PROJECT}_default}"
SEARXNG_PORT="${CONTEXT_KIT_SEARXNG_PORT:-8099}"
DOCS_PORT="${CONTEXT_KIT_DOCS_PORT:-8776}"
DOCS_HTTP_URL="${CONTEXT_KIT_DOCS_HTTP_URL:-http://127.0.0.1:${DOCS_PORT}/mcp}"
DOCS_CONTAINER_NAME="context-kit-docs-mcp"
DOCS_SOURCES_FILE="${DATA_DIR}/docs-sources.txt"
WEB_SEARCH_IMAGE="${CONTEXT_KIT_WEB_SEARCH_IMAGE:-context-kit/web-search-mcp:latest}"
DOCS_IMAGE="${CONTEXT_KIT_DOCS_IMAGE:-context-kit/docs-mcp:latest}"
@@ -47,18 +51,20 @@ usage() {
context-kit: local context tools for coding agents
Usage:
context-kit start Start SearXNG and ensure default images exist
context-kit stop Stop the SearXNG service
context-kit restart Restart SearXNG
context-kit start Start SearXNG + the long-lived docs-mcp service
context-kit stop Stop SearXNG + docs-mcp
context-kit restart Restart SearXNG + docs-mcp
context-kit build Build MCP images
context-kit status Show services, images, and configured docs sources
context-kit doctor Check Docker, services, images, and sources
context-kit status Show services, images, sources, and the docs HTTP endpoint
context-kit doctor Check Docker, services, images, sources, and HTTP endpoints
context-kit redaction-check Scan this repo for local paths and secret patterns
MCP server commands:
context-kit web-search Run the SearXNG-backed web-search MCP server
context-kit docs Run the local llms.txt docs MCP server
context-kit repomix Run Repomix MCP for the current project
context-kit web-search Per-call SearXNG-backed web-search MCP (stdio)
context-kit docs Stdio bridge to the long-lived docs-mcp service
(clients that speak HTTP MCP should connect
directly to the URL printed by `status`)
context-kit repomix Per-call Repomix MCP for the current project (stdio)
Assistant snippets:
context-kit install claude Print a project .mcp.json snippet using context-kit on PATH
@@ -71,10 +77,27 @@ USAGE
compose() {
CONTEXT_KIT_DATA_DIR="${DATA_DIR}" \
CONTEXT_KIT_SEARXNG_PORT="${SEARXNG_PORT}" \
CONTEXT_KIT_DOCS_PORT="${DOCS_PORT}" \
CONTEXT_KIT_DOCS_UID="$(id -u)" \
CONTEXT_KIT_DOCS_GID="$(id -g)" \
CONTEXT_KIT_DOCS_TTL="${CONTEXT_KIT_DOCS_TTL:-24h}" \
CONTEXT_KIT_DOCS_MAX_GET_BYTES="${CONTEXT_KIT_DOCS_MAX_GET_BYTES:-75000}" \
CONTEXT_KIT_DOCS_EMBED_MODEL="${CONTEXT_KIT_DOCS_EMBED_MODEL:-BAAI/bge-small-en-v1.5}" \
CONTEXT_KIT_DOCS_PREINDEX="${CONTEXT_KIT_DOCS_PREINDEX:-0}" \
BUILDX_BUILDER="${CONTEXT_KIT_BUILDX_BUILDER:-${BUILDX_BUILDER:-default}}" \
docker compose -p "${PROJECT}" -f "${COMPOSE_FILE}" "$@"
}
write_docs_sources_file() {
mkdir -p "$(dirname "${DOCS_SOURCES_FILE}")"
local tmp="${DOCS_SOURCES_FILE}.tmp.$$"
{
printf '# generated by context-kit start; edit your CONTEXT_KIT_DOCS_SOURCES file(s) instead\n'
resolved_sources
} > "${tmp}"
mv "${tmp}" "${DOCS_SOURCES_FILE}"
}
warn() {
printf 'warn: %s\n' "$*" >&2
}
@@ -118,6 +141,21 @@ wait_for_searxng() {
warn "SearXNG did not become ready on 127.0.0.1:${SEARXNG_PORT} after 30s"
}
wait_for_docs_mcp() {
command -v curl >/dev/null 2>&1 || return 0
# First-run can take a while: model download + full preindex of every source.
local attempt
for attempt in {1..180}; do
if curl -fsS -o /dev/null "http://127.0.0.1:${DOCS_PORT}/status" 2>/dev/null; then
return 0
fi
sleep 1
done
warn "docs-mcp did not become ready on 127.0.0.1:${DOCS_PORT} after 180s (check: docker logs ${DOCS_CONTAINER_NAME})"
}
abs_dir() {
local path="$1"
mkdir -p "${path}"
@@ -157,7 +195,10 @@ resolved_sources() {
cmd_build() {
[[ "$#" -eq 0 ]] || fail "usage: context-kit build"
require_docker
compose --profile mcp build web-search-mcp docs-mcp
# web-search-mcp is still profile-gated (built but not auto-started);
# docs-mcp is a regular long-lived service so it builds without a profile.
compose --profile mcp build web-search-mcp
compose build docs-mcp
docker pull "${REPOMIX_IMAGE}"
}
@@ -167,13 +208,15 @@ cmd_start() {
if ! docker image inspect "${WEB_SEARCH_IMAGE}" >/dev/null 2>&1 || ! docker image inspect "${DOCS_IMAGE}" >/dev/null 2>&1; then
cmd_build
fi
compose up -d searxng
write_docs_sources_file
compose up -d searxng docs-mcp
wait_for_searxng
wait_for_docs_mcp
}
cmd_stop() {
require_docker
compose stop searxng
compose stop searxng docs-mcp
}
cmd_status() {
@@ -183,6 +226,7 @@ cmd_status() {
printf '\nImages\n'
docker image ls --format '{{.Repository}}:{{.Tag}}\t{{.Size}}' \
| grep -E '^(context-kit/|ghcr.io/yamadashy/repomix:)' || true
printf '\nDocs MCP endpoint\n- %s (container: %s)\n' "${DOCS_HTTP_URL}" "${DOCS_CONTAINER_NAME}"
printf '\nDocs sources\n'
resolved_sources | sed 's/^/- /'
printf '\nData directory\n- %s\n' "${DATA_DIR}"
@@ -230,6 +274,12 @@ cmd_doctor() {
printf 'warn SearXNG not responding on 127.0.0.1:%s\n' "${SEARXNG_PORT}"
fi
if command -v curl >/dev/null 2>&1 && curl -fsS -o /dev/null "http://127.0.0.1:${DOCS_PORT}/status" 2>/dev/null; then
printf 'pass docs-mcp HTTP responds on 127.0.0.1:%s\n' "${DOCS_PORT}"
else
printf 'warn docs-mcp HTTP not responding on 127.0.0.1:%s (run context-kit start)\n' "${DOCS_PORT}"
fi
if [[ "$(resolved_sources | wc -l | tr -d ' ')" -gt 0 ]]; then
printf 'pass docs sources resolve\n'
else
@@ -255,34 +305,24 @@ cmd_web_search() {
}
cmd_docs() {
# Prefer the `type: remote` MCP config pointing at ${DOCS_HTTP_URL}.
# This stdio entrypoint is kept for clients that cannot speak HTTP MCP:
# it spawns a thin mcp-proxy bridge per call but all calls multiplex onto
# the single long-lived docs-mcp container (no Chroma write contention).
require_docker
require_image "${DOCS_IMAGE}" "context-kit build"
local docs_dir models_dir ttl max_get_bytes embed_model
docs_dir="$(abs_dir "${DATA_DIR}/docs")"
models_dir="$(abs_dir "${DATA_DIR}/models")"
ttl="${CONTEXT_KIT_DOCS_TTL:-7d}"
max_get_bytes="${CONTEXT_KIT_DOCS_MAX_GET_BYTES:-75000}"
embed_model="${CONTEXT_KIT_DOCS_EMBED_MODEL:-BAAI/bge-small-en-v1.5}"
local sources=() source
while IFS= read -r source; do
sources+=("${source}")
done < <(resolved_sources)
[[ "${#sources[@]}" -gt 0 ]] || fail "no docs sources configured"
if ! docker ps --filter "name=^${DOCS_CONTAINER_NAME}$" --filter "status=running" --format '{{.Names}}' | grep -qx "${DOCS_CONTAINER_NAME}"; then
fail "long-lived docs-mcp not running; start it with: context-kit start"
fi
exec docker run --rm -i \
--label dev.context-kit=true \
--user "$(id -u):$(id -g)" \
-e HOME=/tmp \
-e USER=context-kit \
-e LOGNAME=context-kit \
-e TORCHINDUCTOR_CACHE_DIR=/tmp/torchinductor \
-v "${docs_dir}:/data" \
-v "${models_dir}:/models" \
--network host \
--entrypoint mcp-proxy \
"${DOCS_IMAGE}" \
--store-path /data \
--ttl "${ttl}" \
--max-get-bytes "${max_get_bytes}" \
--embed-model "${embed_model}" \
"${sources[@]}"
--transport streamablehttp \
"${DOCS_HTTP_URL}"
}
cmd_repomix() {
@@ -308,8 +348,9 @@ snippet_command() {
}
print_opencode() {
local bin
local bin url
bin="$(json_escape "$(snippet_command "${1:-}")")"
url="$(json_escape "${DOCS_HTTP_URL}")"
cat <<JSON
{
"\$schema": "https://opencode.ai/config.json",
@@ -321,10 +362,9 @@ print_opencode() {
"timeout": 60000
},
"context-docs": {
"type": "local",
"command": ["${bin}", "docs"],
"enabled": true,
"timeout": 120000
"type": "remote",
"url": "${url}",
"enabled": true
},
"context-repomix": {
"type": "local",
@@ -338,8 +378,9 @@ JSON
}
print_claude() {
local bin
local bin url
bin="$(json_escape "$(snippet_command "${1:-}")")"
url="$(json_escape "${DOCS_HTTP_URL}")"
cat <<JSON
{
"mcpServers": {
@@ -348,8 +389,8 @@ print_claude() {
"args": ["web-search"]
},
"context-docs": {
"command": "${bin}",
"args": ["docs"]
"type": "http",
"url": "${url}"
},
"context-repomix": {
"command": "${bin}",

View File

@@ -36,12 +36,35 @@ services:
build:
context: ./docker/docs
image: context-kit/docs-mcp:latest
profiles: ["mcp"]
stdin_open: true
tty: false
# Long-lived shared docs MCP. One container = one Chroma writer; clients
# connect over Streamable HTTP (mcp-proxy bridges llms-txt-mcp's stdio).
container_name: context-kit-docs-mcp
restart: unless-stopped
ports:
- "127.0.0.1:${CONTEXT_KIT_DOCS_PORT:-8776}:8000"
# Run as the host user so bind-mounted /data isn't owned by root.
user: "${CONTEXT_KIT_DOCS_UID:-1000}:${CONTEXT_KIT_DOCS_GID:-1000}"
environment:
HOME: /tmp
USER: context-kit
LOGNAME: context-kit
TORCHINDUCTOR_CACHE_DIR: /tmp/torchinductor
DOCS_MCP_TTL: "${CONTEXT_KIT_DOCS_TTL:-24h}"
DOCS_MCP_MAX_GET_BYTES: "${CONTEXT_KIT_DOCS_MAX_GET_BYTES:-75000}"
DOCS_MCP_EMBED_MODEL: "${CONTEXT_KIT_DOCS_EMBED_MODEL:-BAAI/bge-small-en-v1.5}"
# Preindex on startup is off by default; use the docs_refresh tool to
# refresh on demand. Set CONTEXT_KIT_DOCS_PREINDEX=1 to restore eager.
DOCS_MCP_PREINDEX: "${CONTEXT_KIT_DOCS_PREINDEX:-0}"
volumes:
- ${CONTEXT_KIT_DATA_DIR:-${HOME}/.local/share/context-kit}/docs:/data
- ${CONTEXT_KIT_DATA_DIR:-${HOME}/.local/share/context-kit}/models:/models
- ${CONTEXT_KIT_DATA_DIR:-${HOME}/.local/share/context-kit}/docs-sources.txt:/etc/context-kit/docs-sources.txt:ro
healthcheck:
test: ["CMD-SHELL", "python -c \"import urllib.request,sys; sys.exit(0 if urllib.request.urlopen('http://127.0.0.1:8000/status', timeout=2).status < 500 else 1)\""]
interval: 30s
timeout: 5s
retries: 3
start_period: 60s
labels:
dev.context-kit: "true"

View File

@@ -1,2 +1,3 @@
*
!Dockerfile
!entrypoint.sh

View File

@@ -1,6 +1,7 @@
FROM python:3.12-slim
ARG LLMS_TXT_MCP_VERSION=0.2.0
ARG MCP_PROXY_VERSION=0.12.0
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
@@ -12,16 +13,27 @@ RUN pip install --no-cache-dir \
--index-url https://download.pytorch.org/whl/cpu \
torch
# llms-txt-mcp does the indexing/search; mcp-proxy fronts its stdio transport
# as Streamable HTTP so multiple MCP clients can share one long-lived process
# (and therefore one Chroma DB writer).
RUN if [ -n "${LLMS_TXT_MCP_VERSION}" ]; then \
pip install --no-cache-dir "llms-txt-mcp==${LLMS_TXT_MCP_VERSION}"; \
else \
pip install --no-cache-dir llms-txt-mcp; \
fi
fi \
&& pip install --no-cache-dir "mcp-proxy==${MCP_PROXY_VERSION}"
RUN mkdir -p /data /models
COPY entrypoint.sh /usr/local/bin/docs-mcp-entrypoint
RUN chmod +x /usr/local/bin/docs-mcp-entrypoint
RUN mkdir -p /data /models /etc/context-kit
ENV HF_HOME=/models \
SENTENCE_TRANSFORMERS_HOME=/models
SENTENCE_TRANSFORMERS_HOME=/models \
DOCS_MCP_HTTP_HOST=0.0.0.0 \
DOCS_MCP_HTTP_PORT=8000 \
DOCS_MCP_SOURCES_FILE=/etc/context-kit/docs-sources.txt
VOLUME ["/data", "/models"]
EXPOSE 8000
ENTRYPOINT ["llms-txt-mcp"]
ENTRYPOINT ["/usr/local/bin/docs-mcp-entrypoint"]

53
docker/docs/entrypoint.sh Normal file
View File

@@ -0,0 +1,53 @@
#!/bin/sh
# context-kit docs-mcp entrypoint.
#
# Bridges llms-txt-mcp (stdio-only) to Streamable HTTP via mcp-proxy so that
# multiple clients share a single long-lived indexer instead of each spawning
# their own container (and racing on the same Chroma store).
#
# Sources are read from $DOCS_MCP_SOURCES_FILE (one URL per line; `#` comments
# and blank lines are allowed). Everything else is configured via env vars
# with sensible defaults so this image works standalone too.
set -eu
sources_file="${DOCS_MCP_SOURCES_FILE:-/etc/context-kit/docs-sources.txt}"
if [ ! -r "$sources_file" ]; then
echo "docs-mcp: sources file not readable: $sources_file" >&2
echo "docs-mcp: set DOCS_MCP_SOURCES_FILE or mount one at that path." >&2
exit 64
fi
# Strip comments and blank lines, then collapse whitespace into a flat list.
sources=$(grep -vE '^[[:space:]]*(#|$)' "$sources_file" | tr -s '[:space:]' '\n' | grep -v '^$' || true)
if [ -z "$sources" ]; then
echo "docs-mcp: no sources found in $sources_file after stripping comments/blanks" >&2
exit 64
fi
# By default llms-txt-mcp 0.2.0 re-embeds every source on launch (the actual
# default is a background preindex, --no-preindex only disables the foreground
# variant). On a long-lived container that just wastes ~5 min of CPU per
# restart, so we disable BOTH and let the caller use `docs_refresh` on demand.
# Set DOCS_MCP_PREINDEX=1 to restore the eager behavior.
preindex_flag="--no-preindex --no-background-preindex"
if [ "${DOCS_MCP_PREINDEX:-0}" = "1" ]; then
preindex_flag=""
fi
# shellcheck disable=SC2086 # intentional word-splitting on $sources / $preindex_flag
exec mcp-proxy \
--host "${DOCS_MCP_HTTP_HOST:-0.0.0.0}" \
--port "${DOCS_MCP_HTTP_PORT:-8000}" \
--pass-environment \
--allow-origin "${DOCS_MCP_ALLOW_ORIGIN:-*}" \
-- \
llms-txt-mcp \
--store-path /data \
--ttl "${DOCS_MCP_TTL:-24h}" \
--max-get-bytes "${DOCS_MCP_MAX_GET_BYTES:-75000}" \
--embed-model "${DOCS_MCP_EMBED_MODEL:-BAAI/bge-small-en-v1.5}" \
$preindex_flag \
$sources

View File

@@ -14,29 +14,34 @@ shell code.
| `CONTEXT_KIT_DATA_DIR` | `$HOME/.local/share/context-kit` | Persistent docs indexes and model cache |
| `CONTEXT_KIT_COMPOSE_PROJECT` | `context-kit` | Docker Compose project and network prefix |
| `CONTEXT_KIT_SEARXNG_PORT` | `8099` | Localhost SearXNG port |
| `CONTEXT_KIT_DOCS_TTL` | `7d` | Docs re-fetch cadence |
| `CONTEXT_KIT_DOCS_PORT` | `8776` | Localhost port for the long-lived docs-mcp HTTP service |
| `CONTEXT_KIT_DOCS_HTTP_URL` | `http://127.0.0.1:${CONTEXT_KIT_DOCS_PORT}/mcp` | URL emitted into install snippets and used by the stdio bridge |
| `CONTEXT_KIT_DOCS_TTL` | `24h` | Docs re-fetch cadence |
| `CONTEXT_KIT_DOCS_SOURCES` | `config/sources.default.txt` | Space-separated source profile files |
| `CONTEXT_KIT_DOCS_MAX_GET_BYTES` | `75000` | Max bytes returned by docs retrieval |
| `CONTEXT_KIT_DOCS_EMBED_MODEL` | `BAAI/bge-small-en-v1.5` | SentenceTransformers embedding model |
| `CONTEXT_KIT_DOCS_PREINDEX` | `0` | Set to `1` to re-embed every source on container start |
## TTL Guidance
`7d` is the default because most reference docs do not need daily re-embedding.
`24h` is the default. Most reference docs do not need re-embedding more often,
and the shared service does not re-fetch sources until the TTL elapses.
Use shorter TTLs for fast-moving APIs:
```sh
CONTEXT_KIT_DOCS_TTL=72h bin/context-kit docs
CONTEXT_KIT_DOCS_TTL=6h bin/context-kit restart
```
Use longer TTLs for stable specs:
```sh
CONTEXT_KIT_DOCS_TTL=30d bin/context-kit docs
CONTEXT_KIT_DOCS_TTL=30d bin/context-kit restart
```
When freshness matters for one task, prefer a manual refresh through the docs
MCP tool instead of lowering the global TTL for every session.
The docs-mcp container reads `CONTEXT_KIT_DOCS_TTL` at startup, so changes
require `bin/context-kit restart`. When freshness matters for one task, prefer
calling the `docs_refresh` MCP tool instead of lowering the global TTL.
## Source Profiles

View File

@@ -5,8 +5,8 @@
"args": ["web-search"]
},
"context-docs": {
"command": "context-kit",
"args": ["docs"]
"type": "http",
"url": "http://127.0.0.1:8776/mcp"
},
"context-repomix": {
"command": "context-kit",

View File

@@ -8,10 +8,9 @@
"timeout": 60000
},
"context-docs": {
"type": "local",
"command": ["context-kit", "docs"],
"enabled": true,
"timeout": 120000
"type": "remote",
"url": "http://127.0.0.1:8776/mcp",
"enabled": true
},
"context-repomix": {
"type": "local",