# Humans + agents welcome. Training crawlers + SEO scrapers blocked.

# --- Allowed: search engines (send humans) ---
User-agent: Googlebot
Allow: /

User-agent: Bingbot
Allow: /

User-agent: DuckDuckBot
Allow: /

# --- Allowed: 中文搜索引擎（目标读者在国内，显式放行）---
# Bytespider 同时喂头条/抖音搜索和豆包，接受其抓取训练的代价换国内分发入口；
# PetalBot 是华为花瓣搜索（上游模板把它当 SEO scraper 封了，对中文站是误伤）。
User-agent: Baiduspider
Allow: /

User-agent: Sogou web spider
Allow: /

User-agent: 360Spider
Allow: /

User-agent: Bytespider
Allow: /

User-agent: PetalBot
Allow: /

# --- Allowed: agent browsers (user-initiated, send AI users) ---
User-agent: ChatGPT-User
Allow: /

User-agent: OAI-SearchBot
Allow: /

User-agent: PerplexityBot
Allow: /

User-agent: Perplexity-User
Allow: /

User-agent: Claude-User
Allow: /

User-agent: Claude-SearchBot
Allow: /

User-agent: Applebot
Allow: /

# --- Allowed: AI grounding / answer engines (drive citations) ---
# Google-Extended 驱动 AI Overviews + Gemini grounding（AI 回答里最大的曝光面），
# 允许它们：用「训练 opt-out」换「被引用的覆盖面」。
User-agent: Google-Extended
Allow: /

User-agent: FirecrawlAgent
Allow: /

User-agent: Context7
Allow: /

User-agent: Crawl4AI
Allow: /

# --- Blocked: training crawlers ---
User-agent: GPTBot
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: anthropic-ai
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: Amazonbot
Disallow: /

User-agent: FacebookBot
Disallow: /

User-agent: Meta-ExternalAgent
Disallow: /

User-agent: cohere-ai
Disallow: /

User-agent: Diffbot
Disallow: /

User-agent: ImagesiftBot
Disallow: /

User-agent: Omgilibot
Disallow: /

User-agent: peer39_crawler
Disallow: /

User-agent: YouBot
Disallow: /

User-agent: Timpibot
Disallow: /

User-agent: ICC-Crawler
Disallow: /

# --- Blocked: SEO scrapers / link spammers ---
User-agent: AhrefsBot
Disallow: /

User-agent: SemrushBot
Disallow: /

User-agent: MJ12bot
Disallow: /

User-agent: DotBot
Disallow: /

User-agent: BLEXBot
Disallow: /

User-agent: MegaIndex
Disallow: /

User-agent: SeznamBot
Disallow: /

User-agent: DataForSeoBot
Disallow: /

# --- Default: humans + everything else allowed ---
User-agent: *
Allow: /

Sitemap: https://aieng-zh.cn/sitemap.xml
