# Robots.txt for Password Generator Website # Optimized configuration for search engine crawling and security # Last updated: 2025-10-25 # Website: https://dgyy.github.io/Password-Generator/ # =================================================================== # GLOBAL RULES - Apply to all crawlers # =================================================================== User-agent: * # Allow access to main content areas Allow: / Allow: /categories/ Allow: /pages/ # Block sensitive and private areas Disallow: /clear-history.html Disallow: /src/ Disallow: /*.draft.html Disallow: /*.tmp.html Disallow: /temp/ Disallow: /backup/ Disallow: /_drafts/ Disallow: /_private/ Disallow: /admin/ Disallow: /private/ Disallow: /logs/ Disallow: /cache/ # Block files that shouldn't be indexed Disallow: /*.json$ Disallow: /*.xml$ Disallow: /*.txt$ Disallow: /sitemap.xml Disallow: /robots.txt # Block parameter-based URLs to prevent duplicate content Disallow: /*?* Disallow: /*?utm_* Disallow: /*?source=* Disallow: /*?ref=* Disallow: /*?page=* Disallow: /*?sort=* Disallow: /*?filter=* # Block search and utility pages Disallow: /pages/search.html Disallow: /pages/404.html Disallow: /index-optimized.html # Crawl delay to prevent server overload Crawl-delay: 1 # =================================================================== # GOOGLE SPECIFIC CONFIGURATION # =================================================================== User-agent: Googlebot # Allow Googlebot full access to content pages Allow: / Allow: /categories/ Allow: /pages/generators/ Allow: /pages/validators/ Allow: /pages/categories/ # Block only sensitive files and directories Disallow: /src/ Disallow: /clear-history.html Disallow: /*.draft.html Disallow: /temp/ Disallow: /backup/ # No crawl delay for Googlebot (it handles rate limiting automatically) # Crawl-delay: 0 # =================================================================== # GOOGLE IMAGES CONFIGURATION # =================================================================== User-agent: Googlebot-Image # Allow access to images for image search Allow: /assets/ Allow: /*.jpg Allow: /*.jpeg Allow: /*.png Allow: /*.gif Allow: /*.svg Allow: /*.webp # Block sensitive images Disallow: /assets/admin/ Disallow: /assets/private/ # =================================================================== # BING SPECIFIC CONFIGURATION # =================================================================== User-agent: Bingbot # Allow access to main content Allow: / Allow: /categories/ Allow: /pages/ # Block sensitive areas Disallow: /src/ Disallow: /clear-history.html Disallow: /temp/ Disallow: /backup/ Disallow: /*.draft.html # Moderate crawl delay for Bing Crawl-delay: 2 # =================================================================== # BAIDU SPECIFIC CONFIGURATION # =================================================================== User-agent: Baiduspider # Allow access to main pages Allow: / Allow: /categories/ Allow: /pages/generators/ Allow: /pages/validators/ # Block non-Chinese specific content if needed Disallow: /pages/categories/english-only/ # Longer crawl delay for Baidu Crawl-delay: 3 # =================================================================== # YANDEX SPECIFIC CONFIGURATION # =================================================================== User-agent: Yandexbot # Allow access to main content Allow: / Allow: /categories/ Allow: /pages/ # Block sensitive areas Disallow: /src/ Disallow: /clear-history.html Disallow: /temp/ # Moderate crawl delay Crawl-delay: 2 # =================================================================== # SOCIAL MEDIA CRAWLERS # =================================================================== User-agent: facebookexternalhit Allow: / Allow: /categories/ Allow: /pages/ # Allow social media crawlers to generate rich previews User-agent: Twitterbot Allow: / Allow: /categories/ Allow: /pages/ # Allow Twitter crawler for card generation # =================================================================== # PERFORMANCE MONITORING CRAWLERS # =================================================================== User-agent: SemrushBot # Allow limited access for SEO monitoring Allow: /categories/ Allow: /pages/ Crawl-delay: 5 User-agent: AhrefsBot # Allow limited access Allow: / Crawl-delay: 5 User-agent: MJ12bot # Block aggressive crawlers Disallow: / # =================================================================== # UNWANTED CRAWLERS - Block completely # =================================================================== User-agent: ia_archiver Disallow: / User-agent: BacklinkCrawler Disallow: / User-agent: BLEXBot Disallow: / User-agent: BUbiNG Disallow: / User-agent: DotBot Disallow: / User-agent: GrapeshotCrawler Disallow: / User-agent: MassDownloader Disallow: / User-agent: MegaIndex.ru Disallow: / User-agent: SemrushBot-SA Disallow: / User-agent: Shopwiki Disallow: / User-agent: XenuBot Disallow: / # =================================================================== # SPECIAL DIRECTIVES # =================================================================== # Host directive for Russian search engines Host: https://dgyy.github.io/Password-Generator/ # =================================================================== # SITEMAP CONFIGURATION # =================================================================== # Main sitemap Sitemap: https://dgyy.github.io/Password-Generator/sitemap.xml # Additional sitemaps if available (commented out for future use) # Sitemap: https://dgyy.github.io/Password-Generator/sitemap-generators.xml # Sitemap: https://dgyy.github.io/Password-Generator/sitemap-validators.xml # Sitemap: https://dgyy.github.io/Password-Generator/sitemap-categories.xml # =================================================================== # END OF ROBOTS.TXT # =================================================================== # Notes: # - This robots.txt is optimized for password generator website # - Search engines are guided to focus on valuable content # - Sensitive and duplicate content is protected # - Crawl delays prevent server overload # - Social media crawlers can generate rich previews # - Unwanted crawlers are completely blocked # - Sitemap location helps search engine discovery # # For questions about this configuration, contact: wkrealmadrid@hotmail.com # Last review: 2025-10-25