From 4b4e21bcfef050f01eec319559346c5a877807b4 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 25 Apr 2026 16:37:18 +0000 Subject: [PATCH] Tighten robots.txt to limit bot crawl surface (#715) Bots were hammering the API and exhibit-detail pages with random ids, generating both load and 500 noise. Update robots.txt to: - Allow only the public CMS surface (home, collection, community, documentation, memories, docs, see_all). - Disallow /api/, /admin/, /users/, exhibit/marker/object/sound detail and edit paths, generator, modal helpers. - For dev/staging hosts (dev.* or *staging*), emit Disallow: / unconditionally so search engines stop indexing dev.jandig.app. Closes #715 https://claude.ai/code/session_01XC1THLWgnGXGf5wgRhdyvB --- src/core/views/static_views.py | 35 ++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/src/core/views/static_views.py b/src/core/views/static_views.py index 22dff02a..d4e11710 100644 --- a/src/core/views/static_views.py +++ b/src/core/views/static_views.py @@ -41,10 +41,41 @@ def marker_generator(request): return render(request, "core/generator.html", {}) -def robots_txt(_): +def robots_txt(request): + # Block bots entirely on dev/staging hosts so dev.jandig.app stops + # showing up in search results. + host = request.get_host().lower() + if host.startswith("dev.") or "staging" in host: + lines = ["User-Agent: *", "Disallow: /"] + return HttpResponse("\n".join(lines), content_type="text/plain") + + # Production: allow only the public CMS/blog surface. Block API, + # exhibit detail pages (bots feed random ids and cause 500s), and + # auth-gated CMS edit/upload paths. lines = [ "User-Agent: *", - "Disallow: ", + "Allow: /$", + "Allow: /collection/", + "Allow: /community/", + "Allow: /documentation/", + "Allow: /memories/", + "Allow: /docs/", + "Allow: /see_all/", + "Disallow: /api/", + "Disallow: /admin/", + "Disallow: /users/", + "Disallow: /exhibit/", + "Disallow: /exhibits/", + "Disallow: /artwork/", + "Disallow: /artworks/", + "Disallow: /marker/", + "Disallow: /markers/", + "Disallow: /objects/", + "Disallow: /sounds/", + "Disallow: /generator/", + "Disallow: /content/delete/", + "Disallow: /elements/", + "Disallow: /exhibit_select/", ] return HttpResponse("\n".join(lines), content_type="text/plain")