Run docs-mcp as a long-lived shared HTTP service (#1)
Co-authored-by: Ajay Krishnan <ajay@krishnan.ca> Co-committed-by: Ajay Krishnan <ajay@krishnan.ca>
This commit was merged in pull request #1.
This commit is contained in:
@@ -1,2 +1,3 @@
|
||||
*
|
||||
!Dockerfile
|
||||
!entrypoint.sh
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
FROM python:3.12-slim
|
||||
|
||||
ARG LLMS_TXT_MCP_VERSION=0.2.0
|
||||
ARG MCP_PROXY_VERSION=0.12.0
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
@@ -12,16 +13,27 @@ RUN pip install --no-cache-dir \
|
||||
--index-url https://download.pytorch.org/whl/cpu \
|
||||
torch
|
||||
|
||||
# llms-txt-mcp does the indexing/search; mcp-proxy fronts its stdio transport
|
||||
# as Streamable HTTP so multiple MCP clients can share one long-lived process
|
||||
# (and therefore one Chroma DB writer).
|
||||
RUN if [ -n "${LLMS_TXT_MCP_VERSION}" ]; then \
|
||||
pip install --no-cache-dir "llms-txt-mcp==${LLMS_TXT_MCP_VERSION}"; \
|
||||
else \
|
||||
pip install --no-cache-dir llms-txt-mcp; \
|
||||
fi
|
||||
fi \
|
||||
&& pip install --no-cache-dir "mcp-proxy==${MCP_PROXY_VERSION}"
|
||||
|
||||
RUN mkdir -p /data /models
|
||||
COPY entrypoint.sh /usr/local/bin/docs-mcp-entrypoint
|
||||
RUN chmod +x /usr/local/bin/docs-mcp-entrypoint
|
||||
|
||||
RUN mkdir -p /data /models /etc/context-kit
|
||||
ENV HF_HOME=/models \
|
||||
SENTENCE_TRANSFORMERS_HOME=/models
|
||||
SENTENCE_TRANSFORMERS_HOME=/models \
|
||||
DOCS_MCP_HTTP_HOST=0.0.0.0 \
|
||||
DOCS_MCP_HTTP_PORT=8000 \
|
||||
DOCS_MCP_SOURCES_FILE=/etc/context-kit/docs-sources.txt
|
||||
|
||||
VOLUME ["/data", "/models"]
|
||||
EXPOSE 8000
|
||||
|
||||
ENTRYPOINT ["llms-txt-mcp"]
|
||||
ENTRYPOINT ["/usr/local/bin/docs-mcp-entrypoint"]
|
||||
|
||||
53
docker/docs/entrypoint.sh
Normal file
53
docker/docs/entrypoint.sh
Normal file
@@ -0,0 +1,53 @@
|
||||
#!/bin/sh
|
||||
# context-kit docs-mcp entrypoint.
|
||||
#
|
||||
# Bridges llms-txt-mcp (stdio-only) to Streamable HTTP via mcp-proxy so that
|
||||
# multiple clients share a single long-lived indexer instead of each spawning
|
||||
# their own container (and racing on the same Chroma store).
|
||||
#
|
||||
# Sources are read from $DOCS_MCP_SOURCES_FILE (one URL per line; `#` comments
|
||||
# and blank lines are allowed). Everything else is configured via env vars
|
||||
# with sensible defaults so this image works standalone too.
|
||||
|
||||
set -eu
|
||||
|
||||
sources_file="${DOCS_MCP_SOURCES_FILE:-/etc/context-kit/docs-sources.txt}"
|
||||
|
||||
if [ ! -r "$sources_file" ]; then
|
||||
echo "docs-mcp: sources file not readable: $sources_file" >&2
|
||||
echo "docs-mcp: set DOCS_MCP_SOURCES_FILE or mount one at that path." >&2
|
||||
exit 64
|
||||
fi
|
||||
|
||||
# Strip comments and blank lines, then collapse whitespace into a flat list.
|
||||
sources=$(grep -vE '^[[:space:]]*(#|$)' "$sources_file" | tr -s '[:space:]' '\n' | grep -v '^$' || true)
|
||||
|
||||
if [ -z "$sources" ]; then
|
||||
echo "docs-mcp: no sources found in $sources_file after stripping comments/blanks" >&2
|
||||
exit 64
|
||||
fi
|
||||
|
||||
# By default llms-txt-mcp 0.2.0 re-embeds every source on launch (the actual
|
||||
# default is a background preindex, --no-preindex only disables the foreground
|
||||
# variant). On a long-lived container that just wastes ~5 min of CPU per
|
||||
# restart, so we disable BOTH and let the caller use `docs_refresh` on demand.
|
||||
# Set DOCS_MCP_PREINDEX=1 to restore the eager behavior.
|
||||
preindex_flag="--no-preindex --no-background-preindex"
|
||||
if [ "${DOCS_MCP_PREINDEX:-0}" = "1" ]; then
|
||||
preindex_flag=""
|
||||
fi
|
||||
|
||||
# shellcheck disable=SC2086 # intentional word-splitting on $sources / $preindex_flag
|
||||
exec mcp-proxy \
|
||||
--host "${DOCS_MCP_HTTP_HOST:-0.0.0.0}" \
|
||||
--port "${DOCS_MCP_HTTP_PORT:-8000}" \
|
||||
--pass-environment \
|
||||
--allow-origin "${DOCS_MCP_ALLOW_ORIGIN:-*}" \
|
||||
-- \
|
||||
llms-txt-mcp \
|
||||
--store-path /data \
|
||||
--ttl "${DOCS_MCP_TTL:-24h}" \
|
||||
--max-get-bytes "${DOCS_MCP_MAX_GET_BYTES:-75000}" \
|
||||
--embed-model "${DOCS_MCP_EMBED_MODEL:-BAAI/bge-small-en-v1.5}" \
|
||||
$preindex_flag \
|
||||
$sources
|
||||
Reference in New Issue
Block a user