# robots.txt — ResearchHub Foundation
# This file governs crawler access to https://www.researchhub.foundation/
# For AI- and LLM-specific usage guidelines, see:
# https://www.researchhub.foundation/llm.txt

User-agent: *
Allow: /

# Disallow crawling of non-public or administrative paths
Disallow: /admin/
Disallow: /private/
Disallow: /internal/
Disallow: /drafts/
Disallow: /staging/

# Optional: prevent crawling of temporary or utility paths
Disallow: /tmp/
Disallow: /test/

# Explicit AI / LLM agent guidance (best-effort, non-binding)
# These agents are allowed to crawl public content, subject to llm.txt terms

User-agent: GPTBot
Allow: /

User-agent: OpenAI
Allow: /

User-agent: Google-Extended
Allow: /

User-agent: Anthropic
Allow: /

User-agent: ClaudeBot
Allow: /

User-agent: PerplexityBot
Allow: /

User-agent: Bingbot
Allow: /

User-agent: Applebot
Allow: /

User-agent: Common Crawl
Allow: /

# Sitemap (if/when available)
Sitemap: https://www.researchhub.foundation/sitemap.xml