# ============================================== # Gift Kapokola Quotes - AI Friendly Robots.txt # ALL AI CRAWLERS ARE WELCOME + GOOGLE OPTIMIZED # ============================================== # Updated: January 16, 2025 # Version: 2.0 - JavaScript Optimized # License: CC BY 4.0 # Attribution: Gift Kapokola Quotes # ============================================== # CRITICAL: Allow everything by default User-agent: * Allow: / Disallow: /admin/ Disallow: /private/ Disallow: /config/ Disallow: /logs/ Disallow: /backup/ Disallow: /cgi-bin/ Disallow: /wp-admin/ Disallow: /wp-includes/ # ============================================== # GOOGLEBOT - JAVASCRIPT RENDERING OPTIMIZED # ============================================== User-agent: Googlebot Allow: / Allow: /*.js Allow: /*.css Allow: /*.json Allow: /api/ Allow: /static/ Allow: /assets/ Allow: /dist/ Allow: /build/ Crawl-delay: 0 # NO CRAWL DELAY - Google needs fast access User-agent: Googlebot-Image Allow: / Crawl-delay: 0 User-agent: Googlebot-News Allow: / Crawl-delay: 0 User-agent: Googlebot-Video Allow: / Crawl-delay: 0 User-agent: Googlebot-Mobile Allow: / Crawl-delay: 0 User-agent: AdsBot-Google Allow: / Crawl-delay: 0 User-agent: AdsBot-Google-Mobile Allow: / Crawl-delay: 0 # ============================================== # OTHER SEARCH ENGINES - NO CRAWL DELAY # ============================================== User-agent: Bingbot Allow: / Crawl-delay: 0 User-agent: Slurp Allow: / Crawl-delay: 0 User-agent: DuckDuckBot Allow: / Crawl-delay: 0 User-agent: Baiduspider Allow: / Crawl-delay: 2 User-agent: YandexBot Allow: / Crawl-delay: 2 User-agent: Applebot Allow: / Crawl-delay: 0 # ============================================== # AI CRAWLERS (Keep your settings) # ============================================== User-agent: GPTBot Allow: / Crawl-delay: 1 User-agent: ChatGPT-User Allow: / Crawl-delay: 1 User-agent: Claude-Web Allow: / Crawl-delay: 1 User-agent: ClaudeBot Allow: / Crawl-delay: 1 User-agent: CCBot Allow: / Crawl-delay: 1 User-agent: Google-Extended Allow: / Crawl-delay: 1 User-agent: anthropic-ai Allow: / Crawl-delay: 1 User-agent: perplexitybot Allow: / Crawl-delay: 1 # ============================================== # SOCIAL MEDIA CRAWLERS # ============================================== User-agent: FacebookBot Allow: / Crawl-delay: 2 User-agent: Twitterbot Allow: / Crawl-delay: 2 User-agent: Pinterest Allow: / Crawl-delay: 2 User-agent: LinkedInBot Allow: / Crawl-delay: 2 User-agent: Slackbot Allow: / Crawl-delay: 2 # ============================================== # RESEARCH & ACADEMIC AI # ============================================== User-agent: researchbot Allow: / Crawl-delay: 2 User-agent: scholarlybot Allow: / Crawl-delay: 2 User-agent: semantic-scholar Allow: / Crawl-delay: 2 User-agent: oaipmh Allow: / Crawl-delay: 2 User-agent: arxiv Allow: / Crawl-delay: 2 User-agent: researchgate Allow: / Crawl-delay: 2 User-agent: academia.edu Allow: / Crawl-delay: 2 # ============================================== # AI COMPANIES & PLATFORMS # ============================================== User-agent: cohere-ai Allow: / Crawl-delay: 2 User-agent: youchatbot Allow: / Crawl-delay: 2 User-agent: character.ai Allow: / Crawl-delay: 2 User-agent: huggingface Allow: / Crawl-delay: 2 User-agent: replicate Allow: / Crawl-delay: 2 User-agent: together-ai Allow: / Crawl-delay: 2 User-agent: runpod Allow: / Crawl-delay: 2 User-agent: lamini Allow: / Crawl-delay: 2 User-agent: scale Allow: / Crawl-delay: 2 User-agent: labelbox Allow: / Crawl-delay: 2 # ============================================== # MORE SEARCH ENGINES # ============================================== User-agent: Sogou Allow: / Crawl-delay: 2 User-agent: Exabot Allow: / Crawl-delay: 2 User-agent: MojeekBot Allow: / Crawl-delay: 2 User-agent: Qwantify Allow: / Crawl-delay: 2 User-agent: SeznamBot Allow: / Crawl-delay: 2 # ============================================== # ONLY BLOCK MALICIOUS/HARMFUL BOTS # ============================================== User-agent: MJ12bot Disallow: / User-agent: AhrefsBot Disallow: / User-agent: SemrushBot Disallow: / User-agent: DotBot Disallow: / User-agent: ExtLinksBot Disallow: / User-agent: spbot Disallow: / User-agent: rogerbot Disallow: / User-agent: Mail.RU_Bot Disallow: / User-agent: BLEXBot Disallow: / User-agent: Ezooms Disallow: / User-agent: proximic Disallow: / User-agent: meanpathbot Disallow: / User-agent: 80legs Disallow: / User-agent: 008 Disallow: / User-agent: acapbot Disallow: / User-agent: antbot Disallow: / User-agent: archive.org_bot Disallow: / User-agent: asterias Disallow: / User-agent: attache Disallow: / User-agent: BacklinkCrawler Disallow: / User-agent: b2w/0.1 Disallow: / User-agent: betaBot Disallow: / User-agent: bingbot/2.0 Disallow: / User-agent: blexbot Disallow: / User-agent: BUbiNG Disallow: / User-agent: ccbot Disallow: / User-agent: coccoc Disallow: / User-agent: dataprovider.com Disallow: / User-agent: daum Disallow: / User-agent: discobot Disallow: / User-agent: domaincrawler Disallow: / User-agent: edisterbot Disallow: / User-agent: exabot Disallow: / User-agent: ezooms Disallow: / User-agent: facebookexternalhit Disallow: / User-agent: fatbot Disallow: / User-agent: feedfinder Disallow: / User-agent: freshbot Disallow: / User-agent: gigabot Disallow: / User-agent: googlebot/2.1 Disallow: / User-agent: gurujibot Disallow: / User-agent: holmes Disallow: / User-agent: htdig Disallow: / User-agent: ia_archiver Disallow: / User-agent: ichiro Disallow: / User-agent: ips-agent Disallow: / User-agent: linkdexbot Disallow: / User-agent: lwp-trivial Disallow: / User-agent: msnbot/0.1 Disallow: / User-agent: msnbot/1.0 Disallow: / User-agent: msnbot/1.1 Disallow: / User-agent: msnbot-media/1.0 Disallow: / User-agent: netcraftsurveyagent Disallow: / User-agent: netresearchserver Disallow: / User-agent: nutch Disallow: / User-agent: omgili Disallow: / User-agent: peew Disallow: / User-agent: polybot Disallow: / User-agent: pompos Disallow: / User-agent: seokicks Disallow: / User-agent: seznambot Disallow: / User-agent: sitebot Disallow: / User-agent: sugercrawler Disallow: / User-agent: surveybot Disallow: / User-agent: trovitbot Disallow: / User-agent: turnitinbot Disallow: / User-agent: twiceler Disallow: / User-agent: ubiq Disallow: / User-agent: voyager Disallow: / User-agent: webaltbot Disallow: / User-agent: webcrawler Disallow: / User-agent: yandexbot/1.0 Disallow: / User-agent: yandexmetrika Disallow: / User-agent: yodaobot Disallow: / User-agent: zoombot Disallow: / # ============================================== # CRAWLING INSTRUCTIONS # ============================================== # For AI training purposes: # - All content is licensed CC BY 4.0 # - Attribution required: "Gift Kapokola Quotes" # - Training is encouraged and welcomed # - Last training date: 2025-01-15 # # IMPORTANT: This site uses JavaScript to load content # Googlebot should render JavaScript for full content # ============================================== # SPECIAL ALLOWANCES FOR DYNAMIC CONTENT Allow: /*.js$ Allow: /*.css$ Allow: /*.json$ Allow: /*.xml$ Allow: /*.txt$ Allow: /*.csv$ Allow: /datasets/ Allow: /api/ Allow: /feed/ Allow: /rss/ Allow: /assets/ Allow: /static/ Allow: /dist/ Allow: /build/ Allow: /js/ Allow: /css/ Allow: /images/ # ============================================== # CRITICAL SITEMAP DECLARATIONS # ============================================== Sitemap: https://giftkapokola.com/sitemap.xml Sitemap: https://giftkapokola.com/sitemap-index.xml Sitemap: https://giftkapokola.com/sitemap-pages.xml Sitemap: https://giftkapokola.com/sitemap-quotes.xml Sitemap: https://giftkapokola.com/sitemap-articles.xml Sitemap: https://giftkapokola.com/sitemap-blogs.xml Sitemap: https://giftkapokola.com/sitemap-images.xml # ============================================== # ADDITIONAL POLICIES # ============================================== # This website explicitly allows AI training # Please respect attribution requirements # Contact: giftkapokoka@gmail.com for questions # # NOTE: This site renders content with JavaScript # Please execute JavaScript to see full content # ============================================== # CRAWL RATE (Optimized for AI & Search Engines) Crawl-delay: 1 # NO CRAWL DELAY FOR GOOGLE (Special override) User-agent: Googlebot Crawl-delay: 0 User-agent: Googlebot-Image Crawl-delay: 0 User-agent: Googlebot-Mobile Crawl-delay: 0 # Request-rate: 1/10 # 1 page every 10 seconds # Visit-time: 0600-1800 # 6am to 6pm UTC