# DollhouseMCP Website - robots.txt # # This file tells search engine crawlers which parts of the site they can access # We want search engines to index our site for discoverability # Allow all legitimate search engines User-agent: * Allow: / Disallow: /docs/ # Block bad bots and scrapers User-agent: AhrefsBot User-agent: SemrushBot User-agent: DotBot User-agent: MJ12bot User-agent: PetalBot Disallow: / # Sitemap Sitemap: https://dollhousemcp.com/sitemap.xml # LLM and agent discovery (informational — not standard robots.txt directives, # but useful as human-readable pointers; crawlers that support these standards # fetch these paths directly from the root) # llms.txt: https://dollhousemcp.com/llms.txt # agent-manifest: https://dollhousemcp.com/.well-known/agent-manifest.txt # Security.txt location for security researchers (non-standard directive — informational only) # See https://securitytxt.org/ # Security.txt: https://dollhousemcp.com/.well-known/security.txt # Crawl-delay to be respectful of resources (in seconds) # Most modern bots ignore this, but it's good practice Crawl-delay: 1 # Prefer-https: true (non-standard, ignored by compliant parsers — omitted for RFC 9309 cleanliness)