Run docs-mcp as a long-lived shared HTTP service (#1)
Co-authored-by: Ajay Krishnan <ajay@krishnan.ca> Co-committed-by: Ajay Krishnan <ajay@krishnan.ca>
This commit was merged in pull request #1.
This commit is contained in:
11
.env.example
11
.env.example
@@ -14,10 +14,19 @@ CONTEXT_KIT_SEARXNG_PORT=8099
|
|||||||
# beyond localhost, which the default setup does not do.
|
# beyond localhost, which the default setup does not do.
|
||||||
CONTEXT_KIT_SEARXNG_SECRET=change-me-local-only
|
CONTEXT_KIT_SEARXNG_SECRET=change-me-local-only
|
||||||
|
|
||||||
|
# Long-lived context-docs HTTP MCP service. Bound to 127.0.0.1 only.
|
||||||
|
CONTEXT_KIT_DOCS_PORT=8776
|
||||||
|
# Override only if you proxy the service behind another hostname or path.
|
||||||
|
# CONTEXT_KIT_DOCS_HTTP_URL=http://127.0.0.1:8776/mcp
|
||||||
|
|
||||||
# Docs indexing defaults.
|
# Docs indexing defaults.
|
||||||
CONTEXT_KIT_DOCS_TTL=7d
|
CONTEXT_KIT_DOCS_TTL=24h
|
||||||
CONTEXT_KIT_DOCS_MAX_GET_BYTES=75000
|
CONTEXT_KIT_DOCS_MAX_GET_BYTES=75000
|
||||||
CONTEXT_KIT_DOCS_EMBED_MODEL=BAAI/bge-small-en-v1.5
|
CONTEXT_KIT_DOCS_EMBED_MODEL=BAAI/bge-small-en-v1.5
|
||||||
|
|
||||||
|
# Eagerly index every source on container start. Off by default so startup is
|
||||||
|
# fast; call the docs_refresh MCP tool when you want to populate the index.
|
||||||
|
# CONTEXT_KIT_DOCS_PREINDEX=1
|
||||||
|
|
||||||
# One or more source files, separated by spaces.
|
# One or more source files, separated by spaces.
|
||||||
CONTEXT_KIT_DOCS_SOURCES=config/sources.default.txt
|
CONTEXT_KIT_DOCS_SOURCES=config/sources.default.txt
|
||||||
|
|||||||
@@ -56,8 +56,12 @@ config that will not be committed.
|
|||||||
## Defaults
|
## Defaults
|
||||||
|
|
||||||
- SearXNG binds to `127.0.0.1:8099` only.
|
- SearXNG binds to `127.0.0.1:8099` only.
|
||||||
|
- `context-docs` runs as a long-lived service on `127.0.0.1:8776` (Streamable
|
||||||
|
HTTP MCP) so every client shares one indexer and one Chroma writer. The
|
||||||
|
`bin/context-kit docs` stdio command is kept as a compatibility shim for
|
||||||
|
clients that cannot speak HTTP MCP.
|
||||||
- Docs and model caches live in `$HOME/.local/share/context-kit`.
|
- Docs and model caches live in `$HOME/.local/share/context-kit`.
|
||||||
- Docs refresh TTL defaults to `7d`.
|
- Docs refresh TTL defaults to `24h`.
|
||||||
- MCP containers are labeled `dev.context-kit=true` for safe inspection and cleanup.
|
- MCP containers are labeled `dev.context-kit=true` for safe inspection and cleanup.
|
||||||
- Repomix mounts only the current project read-only, not your whole home directory.
|
- Repomix mounts only the current project read-only, not your whole home directory.
|
||||||
- No code-editing MCP server is enabled by default.
|
- No code-editing MCP server is enabled by default.
|
||||||
|
|||||||
125
bin/context-kit
125
bin/context-kit
@@ -37,6 +37,10 @@ COMPOSE_FILE="${ROOT}/compose.yml"
|
|||||||
DATA_DIR="${CONTEXT_KIT_DATA_DIR:-${HOME}/.local/share/context-kit}"
|
DATA_DIR="${CONTEXT_KIT_DATA_DIR:-${HOME}/.local/share/context-kit}"
|
||||||
NETWORK="${CONTEXT_KIT_DOCKER_NETWORK:-${PROJECT}_default}"
|
NETWORK="${CONTEXT_KIT_DOCKER_NETWORK:-${PROJECT}_default}"
|
||||||
SEARXNG_PORT="${CONTEXT_KIT_SEARXNG_PORT:-8099}"
|
SEARXNG_PORT="${CONTEXT_KIT_SEARXNG_PORT:-8099}"
|
||||||
|
DOCS_PORT="${CONTEXT_KIT_DOCS_PORT:-8776}"
|
||||||
|
DOCS_HTTP_URL="${CONTEXT_KIT_DOCS_HTTP_URL:-http://127.0.0.1:${DOCS_PORT}/mcp}"
|
||||||
|
DOCS_CONTAINER_NAME="context-kit-docs-mcp"
|
||||||
|
DOCS_SOURCES_FILE="${DATA_DIR}/docs-sources.txt"
|
||||||
|
|
||||||
WEB_SEARCH_IMAGE="${CONTEXT_KIT_WEB_SEARCH_IMAGE:-context-kit/web-search-mcp:latest}"
|
WEB_SEARCH_IMAGE="${CONTEXT_KIT_WEB_SEARCH_IMAGE:-context-kit/web-search-mcp:latest}"
|
||||||
DOCS_IMAGE="${CONTEXT_KIT_DOCS_IMAGE:-context-kit/docs-mcp:latest}"
|
DOCS_IMAGE="${CONTEXT_KIT_DOCS_IMAGE:-context-kit/docs-mcp:latest}"
|
||||||
@@ -47,18 +51,20 @@ usage() {
|
|||||||
context-kit: local context tools for coding agents
|
context-kit: local context tools for coding agents
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
context-kit start Start SearXNG and ensure default images exist
|
context-kit start Start SearXNG + the long-lived docs-mcp service
|
||||||
context-kit stop Stop the SearXNG service
|
context-kit stop Stop SearXNG + docs-mcp
|
||||||
context-kit restart Restart SearXNG
|
context-kit restart Restart SearXNG + docs-mcp
|
||||||
context-kit build Build MCP images
|
context-kit build Build MCP images
|
||||||
context-kit status Show services, images, and configured docs sources
|
context-kit status Show services, images, sources, and the docs HTTP endpoint
|
||||||
context-kit doctor Check Docker, services, images, and sources
|
context-kit doctor Check Docker, services, images, sources, and HTTP endpoints
|
||||||
context-kit redaction-check Scan this repo for local paths and secret patterns
|
context-kit redaction-check Scan this repo for local paths and secret patterns
|
||||||
|
|
||||||
MCP server commands:
|
MCP server commands:
|
||||||
context-kit web-search Run the SearXNG-backed web-search MCP server
|
context-kit web-search Per-call SearXNG-backed web-search MCP (stdio)
|
||||||
context-kit docs Run the local llms.txt docs MCP server
|
context-kit docs Stdio bridge to the long-lived docs-mcp service
|
||||||
context-kit repomix Run Repomix MCP for the current project
|
(clients that speak HTTP MCP should connect
|
||||||
|
directly to the URL printed by `status`)
|
||||||
|
context-kit repomix Per-call Repomix MCP for the current project (stdio)
|
||||||
|
|
||||||
Assistant snippets:
|
Assistant snippets:
|
||||||
context-kit install claude Print a project .mcp.json snippet using context-kit on PATH
|
context-kit install claude Print a project .mcp.json snippet using context-kit on PATH
|
||||||
@@ -71,10 +77,27 @@ USAGE
|
|||||||
compose() {
|
compose() {
|
||||||
CONTEXT_KIT_DATA_DIR="${DATA_DIR}" \
|
CONTEXT_KIT_DATA_DIR="${DATA_DIR}" \
|
||||||
CONTEXT_KIT_SEARXNG_PORT="${SEARXNG_PORT}" \
|
CONTEXT_KIT_SEARXNG_PORT="${SEARXNG_PORT}" \
|
||||||
|
CONTEXT_KIT_DOCS_PORT="${DOCS_PORT}" \
|
||||||
|
CONTEXT_KIT_DOCS_UID="$(id -u)" \
|
||||||
|
CONTEXT_KIT_DOCS_GID="$(id -g)" \
|
||||||
|
CONTEXT_KIT_DOCS_TTL="${CONTEXT_KIT_DOCS_TTL:-24h}" \
|
||||||
|
CONTEXT_KIT_DOCS_MAX_GET_BYTES="${CONTEXT_KIT_DOCS_MAX_GET_BYTES:-75000}" \
|
||||||
|
CONTEXT_KIT_DOCS_EMBED_MODEL="${CONTEXT_KIT_DOCS_EMBED_MODEL:-BAAI/bge-small-en-v1.5}" \
|
||||||
|
CONTEXT_KIT_DOCS_PREINDEX="${CONTEXT_KIT_DOCS_PREINDEX:-0}" \
|
||||||
BUILDX_BUILDER="${CONTEXT_KIT_BUILDX_BUILDER:-${BUILDX_BUILDER:-default}}" \
|
BUILDX_BUILDER="${CONTEXT_KIT_BUILDX_BUILDER:-${BUILDX_BUILDER:-default}}" \
|
||||||
docker compose -p "${PROJECT}" -f "${COMPOSE_FILE}" "$@"
|
docker compose -p "${PROJECT}" -f "${COMPOSE_FILE}" "$@"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
write_docs_sources_file() {
|
||||||
|
mkdir -p "$(dirname "${DOCS_SOURCES_FILE}")"
|
||||||
|
local tmp="${DOCS_SOURCES_FILE}.tmp.$$"
|
||||||
|
{
|
||||||
|
printf '# generated by context-kit start; edit your CONTEXT_KIT_DOCS_SOURCES file(s) instead\n'
|
||||||
|
resolved_sources
|
||||||
|
} > "${tmp}"
|
||||||
|
mv "${tmp}" "${DOCS_SOURCES_FILE}"
|
||||||
|
}
|
||||||
|
|
||||||
warn() {
|
warn() {
|
||||||
printf 'warn: %s\n' "$*" >&2
|
printf 'warn: %s\n' "$*" >&2
|
||||||
}
|
}
|
||||||
@@ -118,6 +141,21 @@ wait_for_searxng() {
|
|||||||
warn "SearXNG did not become ready on 127.0.0.1:${SEARXNG_PORT} after 30s"
|
warn "SearXNG did not become ready on 127.0.0.1:${SEARXNG_PORT} after 30s"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
wait_for_docs_mcp() {
|
||||||
|
command -v curl >/dev/null 2>&1 || return 0
|
||||||
|
|
||||||
|
# First-run can take a while: model download + full preindex of every source.
|
||||||
|
local attempt
|
||||||
|
for attempt in {1..180}; do
|
||||||
|
if curl -fsS -o /dev/null "http://127.0.0.1:${DOCS_PORT}/status" 2>/dev/null; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
|
||||||
|
warn "docs-mcp did not become ready on 127.0.0.1:${DOCS_PORT} after 180s (check: docker logs ${DOCS_CONTAINER_NAME})"
|
||||||
|
}
|
||||||
|
|
||||||
abs_dir() {
|
abs_dir() {
|
||||||
local path="$1"
|
local path="$1"
|
||||||
mkdir -p "${path}"
|
mkdir -p "${path}"
|
||||||
@@ -157,7 +195,10 @@ resolved_sources() {
|
|||||||
cmd_build() {
|
cmd_build() {
|
||||||
[[ "$#" -eq 0 ]] || fail "usage: context-kit build"
|
[[ "$#" -eq 0 ]] || fail "usage: context-kit build"
|
||||||
require_docker
|
require_docker
|
||||||
compose --profile mcp build web-search-mcp docs-mcp
|
# web-search-mcp is still profile-gated (built but not auto-started);
|
||||||
|
# docs-mcp is a regular long-lived service so it builds without a profile.
|
||||||
|
compose --profile mcp build web-search-mcp
|
||||||
|
compose build docs-mcp
|
||||||
docker pull "${REPOMIX_IMAGE}"
|
docker pull "${REPOMIX_IMAGE}"
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -167,13 +208,15 @@ cmd_start() {
|
|||||||
if ! docker image inspect "${WEB_SEARCH_IMAGE}" >/dev/null 2>&1 || ! docker image inspect "${DOCS_IMAGE}" >/dev/null 2>&1; then
|
if ! docker image inspect "${WEB_SEARCH_IMAGE}" >/dev/null 2>&1 || ! docker image inspect "${DOCS_IMAGE}" >/dev/null 2>&1; then
|
||||||
cmd_build
|
cmd_build
|
||||||
fi
|
fi
|
||||||
compose up -d searxng
|
write_docs_sources_file
|
||||||
|
compose up -d searxng docs-mcp
|
||||||
wait_for_searxng
|
wait_for_searxng
|
||||||
|
wait_for_docs_mcp
|
||||||
}
|
}
|
||||||
|
|
||||||
cmd_stop() {
|
cmd_stop() {
|
||||||
require_docker
|
require_docker
|
||||||
compose stop searxng
|
compose stop searxng docs-mcp
|
||||||
}
|
}
|
||||||
|
|
||||||
cmd_status() {
|
cmd_status() {
|
||||||
@@ -183,6 +226,7 @@ cmd_status() {
|
|||||||
printf '\nImages\n'
|
printf '\nImages\n'
|
||||||
docker image ls --format '{{.Repository}}:{{.Tag}}\t{{.Size}}' \
|
docker image ls --format '{{.Repository}}:{{.Tag}}\t{{.Size}}' \
|
||||||
| grep -E '^(context-kit/|ghcr.io/yamadashy/repomix:)' || true
|
| grep -E '^(context-kit/|ghcr.io/yamadashy/repomix:)' || true
|
||||||
|
printf '\nDocs MCP endpoint\n- %s (container: %s)\n' "${DOCS_HTTP_URL}" "${DOCS_CONTAINER_NAME}"
|
||||||
printf '\nDocs sources\n'
|
printf '\nDocs sources\n'
|
||||||
resolved_sources | sed 's/^/- /'
|
resolved_sources | sed 's/^/- /'
|
||||||
printf '\nData directory\n- %s\n' "${DATA_DIR}"
|
printf '\nData directory\n- %s\n' "${DATA_DIR}"
|
||||||
@@ -230,6 +274,12 @@ cmd_doctor() {
|
|||||||
printf 'warn SearXNG not responding on 127.0.0.1:%s\n' "${SEARXNG_PORT}"
|
printf 'warn SearXNG not responding on 127.0.0.1:%s\n' "${SEARXNG_PORT}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if command -v curl >/dev/null 2>&1 && curl -fsS -o /dev/null "http://127.0.0.1:${DOCS_PORT}/status" 2>/dev/null; then
|
||||||
|
printf 'pass docs-mcp HTTP responds on 127.0.0.1:%s\n' "${DOCS_PORT}"
|
||||||
|
else
|
||||||
|
printf 'warn docs-mcp HTTP not responding on 127.0.0.1:%s (run context-kit start)\n' "${DOCS_PORT}"
|
||||||
|
fi
|
||||||
|
|
||||||
if [[ "$(resolved_sources | wc -l | tr -d ' ')" -gt 0 ]]; then
|
if [[ "$(resolved_sources | wc -l | tr -d ' ')" -gt 0 ]]; then
|
||||||
printf 'pass docs sources resolve\n'
|
printf 'pass docs sources resolve\n'
|
||||||
else
|
else
|
||||||
@@ -255,34 +305,24 @@ cmd_web_search() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
cmd_docs() {
|
cmd_docs() {
|
||||||
|
# Prefer the `type: remote` MCP config pointing at ${DOCS_HTTP_URL}.
|
||||||
|
# This stdio entrypoint is kept for clients that cannot speak HTTP MCP:
|
||||||
|
# it spawns a thin mcp-proxy bridge per call but all calls multiplex onto
|
||||||
|
# the single long-lived docs-mcp container (no Chroma write contention).
|
||||||
require_docker
|
require_docker
|
||||||
require_image "${DOCS_IMAGE}" "context-kit build"
|
require_image "${DOCS_IMAGE}" "context-kit build"
|
||||||
local docs_dir models_dir ttl max_get_bytes embed_model
|
|
||||||
docs_dir="$(abs_dir "${DATA_DIR}/docs")"
|
if ! docker ps --filter "name=^${DOCS_CONTAINER_NAME}$" --filter "status=running" --format '{{.Names}}' | grep -qx "${DOCS_CONTAINER_NAME}"; then
|
||||||
models_dir="$(abs_dir "${DATA_DIR}/models")"
|
fail "long-lived docs-mcp not running; start it with: context-kit start"
|
||||||
ttl="${CONTEXT_KIT_DOCS_TTL:-7d}"
|
fi
|
||||||
max_get_bytes="${CONTEXT_KIT_DOCS_MAX_GET_BYTES:-75000}"
|
|
||||||
embed_model="${CONTEXT_KIT_DOCS_EMBED_MODEL:-BAAI/bge-small-en-v1.5}"
|
|
||||||
local sources=() source
|
|
||||||
while IFS= read -r source; do
|
|
||||||
sources+=("${source}")
|
|
||||||
done < <(resolved_sources)
|
|
||||||
[[ "${#sources[@]}" -gt 0 ]] || fail "no docs sources configured"
|
|
||||||
exec docker run --rm -i \
|
exec docker run --rm -i \
|
||||||
--label dev.context-kit=true \
|
--label dev.context-kit=true \
|
||||||
--user "$(id -u):$(id -g)" \
|
--network host \
|
||||||
-e HOME=/tmp \
|
--entrypoint mcp-proxy \
|
||||||
-e USER=context-kit \
|
|
||||||
-e LOGNAME=context-kit \
|
|
||||||
-e TORCHINDUCTOR_CACHE_DIR=/tmp/torchinductor \
|
|
||||||
-v "${docs_dir}:/data" \
|
|
||||||
-v "${models_dir}:/models" \
|
|
||||||
"${DOCS_IMAGE}" \
|
"${DOCS_IMAGE}" \
|
||||||
--store-path /data \
|
--transport streamablehttp \
|
||||||
--ttl "${ttl}" \
|
"${DOCS_HTTP_URL}"
|
||||||
--max-get-bytes "${max_get_bytes}" \
|
|
||||||
--embed-model "${embed_model}" \
|
|
||||||
"${sources[@]}"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cmd_repomix() {
|
cmd_repomix() {
|
||||||
@@ -308,8 +348,9 @@ snippet_command() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
print_opencode() {
|
print_opencode() {
|
||||||
local bin
|
local bin url
|
||||||
bin="$(json_escape "$(snippet_command "${1:-}")")"
|
bin="$(json_escape "$(snippet_command "${1:-}")")"
|
||||||
|
url="$(json_escape "${DOCS_HTTP_URL}")"
|
||||||
cat <<JSON
|
cat <<JSON
|
||||||
{
|
{
|
||||||
"\$schema": "https://opencode.ai/config.json",
|
"\$schema": "https://opencode.ai/config.json",
|
||||||
@@ -321,10 +362,9 @@ print_opencode() {
|
|||||||
"timeout": 60000
|
"timeout": 60000
|
||||||
},
|
},
|
||||||
"context-docs": {
|
"context-docs": {
|
||||||
"type": "local",
|
"type": "remote",
|
||||||
"command": ["${bin}", "docs"],
|
"url": "${url}",
|
||||||
"enabled": true,
|
"enabled": true
|
||||||
"timeout": 120000
|
|
||||||
},
|
},
|
||||||
"context-repomix": {
|
"context-repomix": {
|
||||||
"type": "local",
|
"type": "local",
|
||||||
@@ -338,8 +378,9 @@ JSON
|
|||||||
}
|
}
|
||||||
|
|
||||||
print_claude() {
|
print_claude() {
|
||||||
local bin
|
local bin url
|
||||||
bin="$(json_escape "$(snippet_command "${1:-}")")"
|
bin="$(json_escape "$(snippet_command "${1:-}")")"
|
||||||
|
url="$(json_escape "${DOCS_HTTP_URL}")"
|
||||||
cat <<JSON
|
cat <<JSON
|
||||||
{
|
{
|
||||||
"mcpServers": {
|
"mcpServers": {
|
||||||
@@ -348,8 +389,8 @@ print_claude() {
|
|||||||
"args": ["web-search"]
|
"args": ["web-search"]
|
||||||
},
|
},
|
||||||
"context-docs": {
|
"context-docs": {
|
||||||
"command": "${bin}",
|
"type": "http",
|
||||||
"args": ["docs"]
|
"url": "${url}"
|
||||||
},
|
},
|
||||||
"context-repomix": {
|
"context-repomix": {
|
||||||
"command": "${bin}",
|
"command": "${bin}",
|
||||||
|
|||||||
29
compose.yml
29
compose.yml
@@ -36,12 +36,35 @@ services:
|
|||||||
build:
|
build:
|
||||||
context: ./docker/docs
|
context: ./docker/docs
|
||||||
image: context-kit/docs-mcp:latest
|
image: context-kit/docs-mcp:latest
|
||||||
profiles: ["mcp"]
|
# Long-lived shared docs MCP. One container = one Chroma writer; clients
|
||||||
stdin_open: true
|
# connect over Streamable HTTP (mcp-proxy bridges llms-txt-mcp's stdio).
|
||||||
tty: false
|
container_name: context-kit-docs-mcp
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "127.0.0.1:${CONTEXT_KIT_DOCS_PORT:-8776}:8000"
|
||||||
|
# Run as the host user so bind-mounted /data isn't owned by root.
|
||||||
|
user: "${CONTEXT_KIT_DOCS_UID:-1000}:${CONTEXT_KIT_DOCS_GID:-1000}"
|
||||||
|
environment:
|
||||||
|
HOME: /tmp
|
||||||
|
USER: context-kit
|
||||||
|
LOGNAME: context-kit
|
||||||
|
TORCHINDUCTOR_CACHE_DIR: /tmp/torchinductor
|
||||||
|
DOCS_MCP_TTL: "${CONTEXT_KIT_DOCS_TTL:-24h}"
|
||||||
|
DOCS_MCP_MAX_GET_BYTES: "${CONTEXT_KIT_DOCS_MAX_GET_BYTES:-75000}"
|
||||||
|
DOCS_MCP_EMBED_MODEL: "${CONTEXT_KIT_DOCS_EMBED_MODEL:-BAAI/bge-small-en-v1.5}"
|
||||||
|
# Preindex on startup is off by default; use the docs_refresh tool to
|
||||||
|
# refresh on demand. Set CONTEXT_KIT_DOCS_PREINDEX=1 to restore eager.
|
||||||
|
DOCS_MCP_PREINDEX: "${CONTEXT_KIT_DOCS_PREINDEX:-0}"
|
||||||
volumes:
|
volumes:
|
||||||
- ${CONTEXT_KIT_DATA_DIR:-${HOME}/.local/share/context-kit}/docs:/data
|
- ${CONTEXT_KIT_DATA_DIR:-${HOME}/.local/share/context-kit}/docs:/data
|
||||||
- ${CONTEXT_KIT_DATA_DIR:-${HOME}/.local/share/context-kit}/models:/models
|
- ${CONTEXT_KIT_DATA_DIR:-${HOME}/.local/share/context-kit}/models:/models
|
||||||
|
- ${CONTEXT_KIT_DATA_DIR:-${HOME}/.local/share/context-kit}/docs-sources.txt:/etc/context-kit/docs-sources.txt:ro
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "python -c \"import urllib.request,sys; sys.exit(0 if urllib.request.urlopen('http://127.0.0.1:8000/status', timeout=2).status < 500 else 1)\""]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 3
|
||||||
|
start_period: 60s
|
||||||
labels:
|
labels:
|
||||||
dev.context-kit: "true"
|
dev.context-kit: "true"
|
||||||
|
|
||||||
|
|||||||
@@ -1,2 +1,3 @@
|
|||||||
*
|
*
|
||||||
!Dockerfile
|
!Dockerfile
|
||||||
|
!entrypoint.sh
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
FROM python:3.12-slim
|
FROM python:3.12-slim
|
||||||
|
|
||||||
ARG LLMS_TXT_MCP_VERSION=0.2.0
|
ARG LLMS_TXT_MCP_VERSION=0.2.0
|
||||||
|
ARG MCP_PROXY_VERSION=0.12.0
|
||||||
|
|
||||||
RUN apt-get update \
|
RUN apt-get update \
|
||||||
&& apt-get install -y --no-install-recommends \
|
&& apt-get install -y --no-install-recommends \
|
||||||
@@ -12,16 +13,27 @@ RUN pip install --no-cache-dir \
|
|||||||
--index-url https://download.pytorch.org/whl/cpu \
|
--index-url https://download.pytorch.org/whl/cpu \
|
||||||
torch
|
torch
|
||||||
|
|
||||||
|
# llms-txt-mcp does the indexing/search; mcp-proxy fronts its stdio transport
|
||||||
|
# as Streamable HTTP so multiple MCP clients can share one long-lived process
|
||||||
|
# (and therefore one Chroma DB writer).
|
||||||
RUN if [ -n "${LLMS_TXT_MCP_VERSION}" ]; then \
|
RUN if [ -n "${LLMS_TXT_MCP_VERSION}" ]; then \
|
||||||
pip install --no-cache-dir "llms-txt-mcp==${LLMS_TXT_MCP_VERSION}"; \
|
pip install --no-cache-dir "llms-txt-mcp==${LLMS_TXT_MCP_VERSION}"; \
|
||||||
else \
|
else \
|
||||||
pip install --no-cache-dir llms-txt-mcp; \
|
pip install --no-cache-dir llms-txt-mcp; \
|
||||||
fi
|
fi \
|
||||||
|
&& pip install --no-cache-dir "mcp-proxy==${MCP_PROXY_VERSION}"
|
||||||
|
|
||||||
RUN mkdir -p /data /models
|
COPY entrypoint.sh /usr/local/bin/docs-mcp-entrypoint
|
||||||
|
RUN chmod +x /usr/local/bin/docs-mcp-entrypoint
|
||||||
|
|
||||||
|
RUN mkdir -p /data /models /etc/context-kit
|
||||||
ENV HF_HOME=/models \
|
ENV HF_HOME=/models \
|
||||||
SENTENCE_TRANSFORMERS_HOME=/models
|
SENTENCE_TRANSFORMERS_HOME=/models \
|
||||||
|
DOCS_MCP_HTTP_HOST=0.0.0.0 \
|
||||||
|
DOCS_MCP_HTTP_PORT=8000 \
|
||||||
|
DOCS_MCP_SOURCES_FILE=/etc/context-kit/docs-sources.txt
|
||||||
|
|
||||||
VOLUME ["/data", "/models"]
|
VOLUME ["/data", "/models"]
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
ENTRYPOINT ["llms-txt-mcp"]
|
ENTRYPOINT ["/usr/local/bin/docs-mcp-entrypoint"]
|
||||||
|
|||||||
53
docker/docs/entrypoint.sh
Normal file
53
docker/docs/entrypoint.sh
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
# context-kit docs-mcp entrypoint.
|
||||||
|
#
|
||||||
|
# Bridges llms-txt-mcp (stdio-only) to Streamable HTTP via mcp-proxy so that
|
||||||
|
# multiple clients share a single long-lived indexer instead of each spawning
|
||||||
|
# their own container (and racing on the same Chroma store).
|
||||||
|
#
|
||||||
|
# Sources are read from $DOCS_MCP_SOURCES_FILE (one URL per line; `#` comments
|
||||||
|
# and blank lines are allowed). Everything else is configured via env vars
|
||||||
|
# with sensible defaults so this image works standalone too.
|
||||||
|
|
||||||
|
set -eu
|
||||||
|
|
||||||
|
sources_file="${DOCS_MCP_SOURCES_FILE:-/etc/context-kit/docs-sources.txt}"
|
||||||
|
|
||||||
|
if [ ! -r "$sources_file" ]; then
|
||||||
|
echo "docs-mcp: sources file not readable: $sources_file" >&2
|
||||||
|
echo "docs-mcp: set DOCS_MCP_SOURCES_FILE or mount one at that path." >&2
|
||||||
|
exit 64
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Strip comments and blank lines, then collapse whitespace into a flat list.
|
||||||
|
sources=$(grep -vE '^[[:space:]]*(#|$)' "$sources_file" | tr -s '[:space:]' '\n' | grep -v '^$' || true)
|
||||||
|
|
||||||
|
if [ -z "$sources" ]; then
|
||||||
|
echo "docs-mcp: no sources found in $sources_file after stripping comments/blanks" >&2
|
||||||
|
exit 64
|
||||||
|
fi
|
||||||
|
|
||||||
|
# By default llms-txt-mcp 0.2.0 re-embeds every source on launch (the actual
|
||||||
|
# default is a background preindex, --no-preindex only disables the foreground
|
||||||
|
# variant). On a long-lived container that just wastes ~5 min of CPU per
|
||||||
|
# restart, so we disable BOTH and let the caller use `docs_refresh` on demand.
|
||||||
|
# Set DOCS_MCP_PREINDEX=1 to restore the eager behavior.
|
||||||
|
preindex_flag="--no-preindex --no-background-preindex"
|
||||||
|
if [ "${DOCS_MCP_PREINDEX:-0}" = "1" ]; then
|
||||||
|
preindex_flag=""
|
||||||
|
fi
|
||||||
|
|
||||||
|
# shellcheck disable=SC2086 # intentional word-splitting on $sources / $preindex_flag
|
||||||
|
exec mcp-proxy \
|
||||||
|
--host "${DOCS_MCP_HTTP_HOST:-0.0.0.0}" \
|
||||||
|
--port "${DOCS_MCP_HTTP_PORT:-8000}" \
|
||||||
|
--pass-environment \
|
||||||
|
--allow-origin "${DOCS_MCP_ALLOW_ORIGIN:-*}" \
|
||||||
|
-- \
|
||||||
|
llms-txt-mcp \
|
||||||
|
--store-path /data \
|
||||||
|
--ttl "${DOCS_MCP_TTL:-24h}" \
|
||||||
|
--max-get-bytes "${DOCS_MCP_MAX_GET_BYTES:-75000}" \
|
||||||
|
--embed-model "${DOCS_MCP_EMBED_MODEL:-BAAI/bge-small-en-v1.5}" \
|
||||||
|
$preindex_flag \
|
||||||
|
$sources
|
||||||
@@ -14,29 +14,34 @@ shell code.
|
|||||||
| `CONTEXT_KIT_DATA_DIR` | `$HOME/.local/share/context-kit` | Persistent docs indexes and model cache |
|
| `CONTEXT_KIT_DATA_DIR` | `$HOME/.local/share/context-kit` | Persistent docs indexes and model cache |
|
||||||
| `CONTEXT_KIT_COMPOSE_PROJECT` | `context-kit` | Docker Compose project and network prefix |
|
| `CONTEXT_KIT_COMPOSE_PROJECT` | `context-kit` | Docker Compose project and network prefix |
|
||||||
| `CONTEXT_KIT_SEARXNG_PORT` | `8099` | Localhost SearXNG port |
|
| `CONTEXT_KIT_SEARXNG_PORT` | `8099` | Localhost SearXNG port |
|
||||||
| `CONTEXT_KIT_DOCS_TTL` | `7d` | Docs re-fetch cadence |
|
| `CONTEXT_KIT_DOCS_PORT` | `8776` | Localhost port for the long-lived docs-mcp HTTP service |
|
||||||
|
| `CONTEXT_KIT_DOCS_HTTP_URL` | `http://127.0.0.1:${CONTEXT_KIT_DOCS_PORT}/mcp` | URL emitted into install snippets and used by the stdio bridge |
|
||||||
|
| `CONTEXT_KIT_DOCS_TTL` | `24h` | Docs re-fetch cadence |
|
||||||
| `CONTEXT_KIT_DOCS_SOURCES` | `config/sources.default.txt` | Space-separated source profile files |
|
| `CONTEXT_KIT_DOCS_SOURCES` | `config/sources.default.txt` | Space-separated source profile files |
|
||||||
| `CONTEXT_KIT_DOCS_MAX_GET_BYTES` | `75000` | Max bytes returned by docs retrieval |
|
| `CONTEXT_KIT_DOCS_MAX_GET_BYTES` | `75000` | Max bytes returned by docs retrieval |
|
||||||
| `CONTEXT_KIT_DOCS_EMBED_MODEL` | `BAAI/bge-small-en-v1.5` | SentenceTransformers embedding model |
|
| `CONTEXT_KIT_DOCS_EMBED_MODEL` | `BAAI/bge-small-en-v1.5` | SentenceTransformers embedding model |
|
||||||
|
| `CONTEXT_KIT_DOCS_PREINDEX` | `0` | Set to `1` to re-embed every source on container start |
|
||||||
|
|
||||||
## TTL Guidance
|
## TTL Guidance
|
||||||
|
|
||||||
`7d` is the default because most reference docs do not need daily re-embedding.
|
`24h` is the default. Most reference docs do not need re-embedding more often,
|
||||||
|
and the shared service does not re-fetch sources until the TTL elapses.
|
||||||
|
|
||||||
Use shorter TTLs for fast-moving APIs:
|
Use shorter TTLs for fast-moving APIs:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
CONTEXT_KIT_DOCS_TTL=72h bin/context-kit docs
|
CONTEXT_KIT_DOCS_TTL=6h bin/context-kit restart
|
||||||
```
|
```
|
||||||
|
|
||||||
Use longer TTLs for stable specs:
|
Use longer TTLs for stable specs:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
CONTEXT_KIT_DOCS_TTL=30d bin/context-kit docs
|
CONTEXT_KIT_DOCS_TTL=30d bin/context-kit restart
|
||||||
```
|
```
|
||||||
|
|
||||||
When freshness matters for one task, prefer a manual refresh through the docs
|
The docs-mcp container reads `CONTEXT_KIT_DOCS_TTL` at startup, so changes
|
||||||
MCP tool instead of lowering the global TTL for every session.
|
require `bin/context-kit restart`. When freshness matters for one task, prefer
|
||||||
|
calling the `docs_refresh` MCP tool instead of lowering the global TTL.
|
||||||
|
|
||||||
## Source Profiles
|
## Source Profiles
|
||||||
|
|
||||||
|
|||||||
@@ -5,8 +5,8 @@
|
|||||||
"args": ["web-search"]
|
"args": ["web-search"]
|
||||||
},
|
},
|
||||||
"context-docs": {
|
"context-docs": {
|
||||||
"command": "context-kit",
|
"type": "http",
|
||||||
"args": ["docs"]
|
"url": "http://127.0.0.1:8776/mcp"
|
||||||
},
|
},
|
||||||
"context-repomix": {
|
"context-repomix": {
|
||||||
"command": "context-kit",
|
"command": "context-kit",
|
||||||
|
|||||||
@@ -8,10 +8,9 @@
|
|||||||
"timeout": 60000
|
"timeout": 60000
|
||||||
},
|
},
|
||||||
"context-docs": {
|
"context-docs": {
|
||||||
"type": "local",
|
"type": "remote",
|
||||||
"command": ["context-kit", "docs"],
|
"url": "http://127.0.0.1:8776/mcp",
|
||||||
"enabled": true,
|
"enabled": true
|
||||||
"timeout": 120000
|
|
||||||
},
|
},
|
||||||
"context-repomix": {
|
"context-repomix": {
|
||||||
"type": "local",
|
"type": "local",
|
||||||
|
|||||||
Reference in New Issue
Block a user