# ============================================================ # robots.txt – Optimized for WordPress # Last updated: 2025 # ============================================================ # ============================================================ # GLOBAL BASE RULES # ============================================================ User-agent: * # WordPress System Files & Admin Disallow: /wp-admin/ Allow: /wp-admin/admin-ajax.php Disallow: /wp-includes/ Disallow: /wp-login.php Disallow: /wp-register.php Disallow: /xmlrpc.php Disallow: /readme.html Disallow: /license.txt Disallow: /wp-config.php Disallow: /wp-cron.php Disallow: /wp-mail.php Disallow: /wp-trackback.php # WordPress Upload Exploits Disallow: /wp-content/uploads/wpo-plugins-tables-list.json # Author Archives (User Enumeration Prevention) Disallow: /author/ Disallow: /?author= Disallow: /*?author= # Search & Pagination Disallow: /?s= Disallow: /*?s= Disallow: /search/ Disallow: /page/ Disallow: /*?paged= Disallow: /*?page= # Noisy Query Parameters Disallow: /*?p= Disallow: /*&p= Disallow: /*?preview= Disallow: /*?attachment_id= Disallow: /*?filtering= Disallow: /*?replytocom= Disallow: /*/disclaimer/ # Feeds (optional – remove if you want feeds indexed) Disallow: /feed/ Disallow: /*/feed/ Disallow: /comments/feed/ # Allow Static Assets (CSS, JS, Images) Allow: /*.css$ Allow: /*.js$ Allow: /*.jpg$ Allow: /*.jpeg$ Allow: /*.png$ Allow: /*.gif$ Allow: /*.webp$ Allow: /*.svg$ Allow: /*.ico$ Allow: /*.bmp$ Allow: /*.woff$ Allow: /*.woff2$ # Allow Ads & App Ads Allow: /ads.txt Allow: /app-ads.txt # ============================================================ # MAJOR SEARCH ENGINES – Full Access # ============================================================ User-agent: Googlebot Allow: / User-agent: Googlebot-Image Allow: / User-agent: bingbot Allow: / User-agent: Slurp Allow: / User-agent: DuckDuckBot Allow: / User-agent: Baiduspider Allow: / User-agent: Yandex Allow: / User-agent: Sogou Allow: / User-agent: Exabot Allow: / # ============================================================ # AI BOTS – Full Access (Explicitly Allowed) # ============================================================ # OpenAI / ChatGPT User-agent: GPTBot Allow: / User-agent: ChatGPT-User Allow: / User-agent: OAI-SearchBot Allow: / # Perplexity User-agent: PerplexityBot Allow: / # Anthropic / Claude User-agent: ClaudeBot Allow: / User-agent: Claude-Web Allow: / User-agent: anthropic-ai Allow: / # Google AI User-agent: Google-Extended Allow: / # Meta AI User-agent: Meta-ExternalAgent Allow: / User-agent: FacebookBot Allow: / # Apple User-agent: Applebot Allow: / User-agent: Applebot-Extended Allow: / # Amazon / Alexa User-agent: Amazonbot Allow: / # Microsoft / Copilot User-agent: Bingbot Allow: / # You.com User-agent: YouBot Allow: / # Phind User-agent: PhindBot Allow: / # AI2 / Allen Institute User-agent: AI2Bot Allow: / # Cohere User-agent: cohere-ai Allow: / # Diffbot User-agent: Diffbot Allow: / # Common Crawl (used by many AI training datasets) User-agent: CCBot Allow: / # Bytedance User-agent: Bytespider Allow: / # ============================================================ # SCRAPER & GENERIC BOT PROTECTION # ============================================================ User-agent: python-requests Allow: / User-agent: curl Allow: / User-agent: wget Allow: / User-agent: libwww-perl Allow: / User-agent: Go-http-client Allow: / User-agent: Apache-HttpClient Allow: / User-agent: Scrapy Allow: / User-agent: Java Allow: / User-agent: MJ12bot Allow: / User-agent: DotBot Allow: / User-agent: AhrefsBot Allow: / User-agent: SemrushBot Allow: / User-agent: MajesticSEO Allow: / User-agent: BLEXBot Allow: / # ============================================================ # SITEMAP # ============================================================ Sitemap: https://workdigital.de/sitemap_index.xml