# DollhouseMCP Website - robots.txt
#
# This file tells search engine crawlers which parts of the site they can access
# We want search engines to index our site for discoverability

# Allow all legitimate search engines
User-agent: *
Allow: /
Disallow: /docs/

# Block bad bots and scrapers
User-agent: AhrefsBot
User-agent: SemrushBot
User-agent: DotBot
User-agent: MJ12bot
User-agent: PetalBot
Disallow: /

# Sitemap
Sitemap: https://dollhousemcp.com/sitemap.xml

# LLM and agent discovery (informational — not standard robots.txt directives,
# but useful as human-readable pointers; crawlers that support these standards
# fetch these paths directly from the root)
# llms.txt: https://dollhousemcp.com/llms.txt
# agent-manifest: https://dollhousemcp.com/.well-known/agent-manifest.txt

# Security.txt location for security researchers (non-standard directive — informational only)
# See https://securitytxt.org/
# Security.txt: https://dollhousemcp.com/.well-known/security.txt

# Crawl-delay to be respectful of resources (in seconds)
# Most modern bots ignore this, but it's good practice
Crawl-delay: 1

# Prefer-https: true (non-standard, ignored by compliant parsers — omitted for RFC 9309 cleanliness)