Harden web search and docs defaults
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
*
|
||||
!Dockerfile
|
||||
!entrypoint.sh
|
||||
!constraints.txt
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
FROM python:3.12-slim
|
||||
FROM python:3.12-slim@sha256:6c4dd321d176d61ea848dc8c73a4f7dbae8f70e0ee48bb411ea2f045b599fa8e
|
||||
|
||||
ARG LLMS_TXT_MCP_VERSION=0.2.0
|
||||
ARG MCP_PROXY_VERSION=0.12.0
|
||||
ARG TORCH_VERSION=2.12.1+cpu
|
||||
|
||||
COPY constraints.txt /tmp/context-kit-docs-constraints.txt
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
@@ -11,17 +14,19 @@ RUN apt-get update \
|
||||
# Install CPU-only torch first so llms-txt-mcp does not pull large CUDA wheels.
|
||||
RUN pip install --no-cache-dir \
|
||||
--index-url https://download.pytorch.org/whl/cpu \
|
||||
torch
|
||||
-c /tmp/context-kit-docs-constraints.txt \
|
||||
"torch==${TORCH_VERSION}"
|
||||
|
||||
# llms-txt-mcp does the indexing/search; mcp-proxy fronts its stdio transport
|
||||
# as Streamable HTTP so multiple MCP clients can share one long-lived process
|
||||
# (and therefore one Chroma DB writer).
|
||||
RUN if [ -n "${LLMS_TXT_MCP_VERSION}" ]; then \
|
||||
pip install --no-cache-dir "llms-txt-mcp==${LLMS_TXT_MCP_VERSION}"; \
|
||||
pip install --no-cache-dir -c /tmp/context-kit-docs-constraints.txt "llms-txt-mcp==${LLMS_TXT_MCP_VERSION}"; \
|
||||
else \
|
||||
pip install --no-cache-dir llms-txt-mcp; \
|
||||
pip install --no-cache-dir -c /tmp/context-kit-docs-constraints.txt llms-txt-mcp; \
|
||||
fi \
|
||||
&& pip install --no-cache-dir "mcp-proxy==${MCP_PROXY_VERSION}"
|
||||
&& pip install --no-cache-dir -c /tmp/context-kit-docs-constraints.txt "mcp-proxy==${MCP_PROXY_VERSION}" \
|
||||
&& rm /tmp/context-kit-docs-constraints.txt
|
||||
|
||||
COPY entrypoint.sh /usr/local/bin/docs-mcp-entrypoint
|
||||
RUN chmod +x /usr/local/bin/docs-mcp-entrypoint
|
||||
|
||||
107
docker/docs/constraints.txt
Normal file
107
docker/docs/constraints.txt
Normal file
@@ -0,0 +1,107 @@
|
||||
aiohappyeyeballs==2.6.2
|
||||
aiohttp==3.14.1
|
||||
aiosignal==1.4.0
|
||||
annotated-doc==0.0.4
|
||||
annotated-types==0.7.0
|
||||
anyio==4.14.1
|
||||
attrs==26.1.0
|
||||
bcrypt==5.0.0
|
||||
build==1.5.0
|
||||
certifi==2026.6.17
|
||||
cffi==2.0.0
|
||||
charset-normalizer==3.4.7
|
||||
chromadb==1.5.9
|
||||
click==8.4.2
|
||||
cryptography==49.0.0
|
||||
durationpy==0.10
|
||||
filelock==3.29.0
|
||||
flatbuffers==25.12.19
|
||||
frozenlist==1.8.0
|
||||
fsspec==2026.4.0
|
||||
googleapis-common-protos==1.75.0
|
||||
grpcio==1.81.1
|
||||
h11==0.16.0
|
||||
hf-xet==1.5.1
|
||||
httpcore==1.0.9
|
||||
httptools==0.8.0
|
||||
httpx==0.28.1
|
||||
httpx-sse==0.4.3
|
||||
httpx_auth==0.23.1
|
||||
huggingface_hub==1.20.1
|
||||
idna==3.18
|
||||
importlib_resources==7.1.0
|
||||
Jinja2==3.1.6
|
||||
joblib==1.5.3
|
||||
jsonschema==4.26.0
|
||||
jsonschema-specifications==2025.9.1
|
||||
kubernetes==36.0.2
|
||||
llms-txt-mcp==0.2.0
|
||||
markdown-it-py==4.2.0
|
||||
MarkupSafe==3.0.3
|
||||
mcp==1.28.0
|
||||
mcp-proxy==0.12.0
|
||||
mdurl==0.1.2
|
||||
mmh3==5.2.1
|
||||
mpmath==1.3.0
|
||||
multidict==6.7.1
|
||||
narwhals==2.22.1
|
||||
networkx==3.6.1
|
||||
numpy==2.5.0
|
||||
oauthlib==3.3.1
|
||||
onnxruntime==1.27.0
|
||||
opentelemetry-api==1.43.0
|
||||
opentelemetry-exporter-otlp-proto-common==1.43.0
|
||||
opentelemetry-exporter-otlp-proto-grpc==1.43.0
|
||||
opentelemetry-proto==1.43.0
|
||||
opentelemetry-sdk==1.43.0
|
||||
opentelemetry-semantic-conventions==0.64b0
|
||||
orjson==3.11.9
|
||||
overrides==7.7.0
|
||||
packaging==26.2
|
||||
propcache==0.5.2
|
||||
protobuf==7.35.1
|
||||
pybase64==1.4.3
|
||||
pycparser==3.0
|
||||
pydantic==2.13.4
|
||||
pydantic-settings==2.14.2
|
||||
pydantic_core==2.46.4
|
||||
Pygments==2.20.0
|
||||
PyJWT==2.13.0
|
||||
PyPika==0.51.1
|
||||
pyproject_hooks==1.2.0
|
||||
python-dateutil==2.9.0.post0
|
||||
python-dotenv==1.2.2
|
||||
python-multipart==0.0.32
|
||||
PyYAML==6.0.3
|
||||
referencing==0.37.0
|
||||
regex==2026.5.9
|
||||
requests==2.34.2
|
||||
requests-oauthlib==2.0.0
|
||||
rich==15.0.0
|
||||
rpds-py==2026.5.1
|
||||
safetensors==0.8.0
|
||||
scikit-learn==1.9.0
|
||||
scipy==1.18.0
|
||||
sentence-transformers==5.6.0
|
||||
setuptools==70.2.0
|
||||
shellingham==1.5.4
|
||||
six==1.17.0
|
||||
sse-starlette==3.4.5
|
||||
starlette==1.3.1
|
||||
sympy==1.14.0
|
||||
tenacity==9.1.4
|
||||
threadpoolctl==3.6.0
|
||||
tokenizers==0.22.2
|
||||
torch==2.12.1+cpu
|
||||
tqdm==4.68.3
|
||||
transformers==5.12.1
|
||||
typer==0.25.1
|
||||
typing-inspection==0.4.2
|
||||
typing_extensions==4.15.0
|
||||
urllib3==2.7.0
|
||||
uvicorn==0.49.0
|
||||
uvloop==0.22.1
|
||||
watchfiles==1.2.0
|
||||
websocket-client==1.9.0
|
||||
websockets==16.0
|
||||
yarl==1.24.2
|
||||
@@ -12,6 +12,8 @@
|
||||
set -eu
|
||||
|
||||
sources_file="${DOCS_MCP_SOURCES_FILE:-/etc/context-kit/docs-sources.txt}"
|
||||
local_sources_dir="${DOCS_MCP_LOCAL_SOURCES_DIR:-/etc/context-kit/local-sources}"
|
||||
local_sources_port="${DOCS_MCP_LOCAL_SOURCES_PORT:-8769}"
|
||||
|
||||
if [ ! -r "$sources_file" ]; then
|
||||
echo "docs-mcp: sources file not readable: $sources_file" >&2
|
||||
@@ -27,11 +29,41 @@ if [ -z "$sources" ]; then
|
||||
exit 64
|
||||
fi
|
||||
|
||||
if [ -d "$local_sources_dir" ]; then
|
||||
python -m http.server "$local_sources_port" \
|
||||
--bind 127.0.0.1 \
|
||||
--directory "$local_sources_dir" \
|
||||
>/tmp/context-kit-local-sources.log 2>&1 &
|
||||
local_sources_pid="$!"
|
||||
if ! python - "$local_sources_port" <<'PY'
|
||||
import sys
|
||||
import time
|
||||
import urllib.request
|
||||
|
||||
port = sys.argv[1]
|
||||
last_error = None
|
||||
for _ in range(20):
|
||||
try:
|
||||
with urllib.request.urlopen(f"http://127.0.0.1:{port}/", timeout=0.5) as response:
|
||||
if response.status < 500:
|
||||
raise SystemExit(0)
|
||||
except Exception as error:
|
||||
last_error = error
|
||||
time.sleep(0.1)
|
||||
raise SystemExit(f"local source server did not become ready: {last_error}")
|
||||
PY
|
||||
then
|
||||
kill "$local_sources_pid" 2>/dev/null || true
|
||||
echo "docs-mcp: local source server failed on 127.0.0.1:$local_sources_port" >&2
|
||||
exit 65
|
||||
fi
|
||||
fi
|
||||
|
||||
# By default llms-txt-mcp 0.2.0 re-embeds every source on launch (the actual
|
||||
# default is a background preindex, --no-preindex only disables the foreground
|
||||
# variant). On a long-lived container that just wastes ~5 min of CPU per
|
||||
# restart, so we disable BOTH and let the caller use `docs_refresh` on demand.
|
||||
# Set DOCS_MCP_PREINDEX=1 to restore the eager behavior.
|
||||
# variant). On a long-lived container that wastes CPU per restart, so we disable
|
||||
# BOTH. Missing/stale sources still refresh on first docs_query/docs_refresh.
|
||||
# Set DOCS_MCP_PREINDEX=1 to restore eager startup indexing.
|
||||
preindex_flag="--no-preindex --no-background-preindex"
|
||||
if [ "${DOCS_MCP_PREINDEX:-0}" = "1" ]; then
|
||||
preindex_flag=""
|
||||
|
||||
Reference in New Issue
Block a user