# robots.txt for Colnect Collectors Community - https://colnect.com # # WARNING! Collection of data on Colnect by using automated means is strictly prohibited without written consent from Colnect # # Note you may apply for CAPI (Colnect API) license # # Happy collecting :) # Sitemap: https://colnect.com/s/sitemap_en.xml.gz Sitemap: https://colnect.com/s/sitemap_af.xml.gz Sitemap: https://colnect.com/s/sitemap_sq.xml.gz Sitemap: https://colnect.com/s/sitemap_ar.xml.gz Sitemap: https://colnect.com/s/sitemap_az.xml.gz Sitemap: https://colnect.com/s/sitemap_bn.xml.gz Sitemap: https://colnect.com/s/sitemap_bg.xml.gz Sitemap: https://colnect.com/s/sitemap_be.xml.gz Sitemap: https://colnect.com/s/sitemap_ca.xml.gz Sitemap: https://colnect.com/s/sitemap_hr.xml.gz Sitemap: https://colnect.com/s/sitemap_cs.xml.gz Sitemap: https://colnect.com/s/sitemap_da.xml.gz Sitemap: https://colnect.com/s/sitemap_nl.xml.gz Sitemap: https://colnect.com/s/sitemap_et.xml.gz Sitemap: https://colnect.com/s/sitemap_fi.xml.gz Sitemap: https://colnect.com/s/sitemap_fr.xml.gz Sitemap: https://colnect.com/s/sitemap_fy.xml.gz Sitemap: https://colnect.com/s/sitemap_ka.xml.gz Sitemap: https://colnect.com/s/sitemap_de.xml.gz Sitemap: https://colnect.com/s/sitemap_el.xml.gz Sitemap: https://colnect.com/s/sitemap_he.xml.gz Sitemap: https://colnect.com/s/sitemap_hi.xml.gz Sitemap: https://colnect.com/s/sitemap_hu.xml.gz Sitemap: https://colnect.com/s/sitemap_id.xml.gz Sitemap: https://colnect.com/s/sitemap_it.xml.gz Sitemap: https://colnect.com/s/sitemap_ja.xml.gz Sitemap: https://colnect.com/s/sitemap_ko.xml.gz Sitemap: https://colnect.com/s/sitemap_lv.xml.gz Sitemap: https://colnect.com/s/sitemap_lt.xml.gz Sitemap: https://colnect.com/s/sitemap_ms.xml.gz Sitemap: https://colnect.com/s/sitemap_no.xml.gz Sitemap: https://colnect.com/s/sitemap_fa.xml.gz Sitemap: https://colnect.com/s/sitemap_pl.xml.gz Sitemap: https://colnect.com/s/sitemap_pt.xml.gz Sitemap: https://colnect.com/s/sitemap_ro.xml.gz Sitemap: https://colnect.com/s/sitemap_ru.xml.gz Sitemap: https://colnect.com/s/sitemap_sr.xml.gz Sitemap: https://colnect.com/s/sitemap_si.xml.gz Sitemap: https://colnect.com/s/sitemap_sk.xml.gz Sitemap: https://colnect.com/s/sitemap_sl.xml.gz Sitemap: https://colnect.com/s/sitemap_es.xml.gz Sitemap: https://colnect.com/s/sitemap_sv.xml.gz Sitemap: https://colnect.com/s/sitemap_tl.xml.gz Sitemap: https://colnect.com/s/sitemap_th.xml.gz Sitemap: https://colnect.com/s/sitemap_tr.xml.gz Sitemap: https://colnect.com/s/sitemap_uk.xml.gz Sitemap: https://colnect.com/s/sitemap_ur.xml.gz Sitemap: https://colnect.com/s/sitemap_br.xml.gz Sitemap: https://colnect.com/s/sitemap_zt.xml.gz # Ad verification / contextual classification crawlers # PubMax requested these ad-related bots to be allowed User-agent: proximic User-agent: grapeshot User-agent: WeSEE User-agent: ias_crawler User-agent: ias_wombles User-agent: AudigentAdBot User-agent: CriteoBot #User-agent: AhrefsBot #User-agent: SemrushBot #User-agent: MJ12bot #User-agent: dotbot #User-agent: BrandVerity/1.0 # ChatGPT recommended these to be added User-agent: moatbot User-agent: DoubleVerifyBot User-agent: DVBot User-agent: Google-InspectionTool User-agent: GoogleOther User-agent: Pinterestbot Crawl-delay: 2 Disallow: /*/account/ # msnbot gone berzerk, hopefully temporary, commented out 27/5/2020 #User-agent: msnbot #Disallow: / #User-agent: bingbot #Disallow: / #User-agent: BingPreview #Disallow: / #User-agent: AdIdxBot #Disallow: / # Block Facebook to preview pages #User-agent: facebookexternalhit #Disallow: / # Crawlers that are kind enough to obey, but which we'd rather not have # unless they're feeding search engines. User-agent: Baiduspider User-agent: Baiduspider-video User-agent: Baiduspider-image User-agent: UbiCrawler User-agent: BUbiNG User-agent: DOC User-agent: Zao User-agent: Arquivo-web-crawler User-agent: Verity User-agent: AspiegelBot User-agent: leakix.net # Seems legit but for 10 visits a month paying in thousands of daily requests is too much User-agent: DAUM Disallow: / # Some bots are known to be trouble, particularly those designed to copy # entire sites. Please obey robots.txt. User-agent: sitecheck.internetseer.com User-agent: Zealbot User-agent: MSIECrawler User-agent: SiteSnagger User-agent: WebStripper User-agent: WebCopier User-agent: Fetch User-agent: Offline Explorer User-agent: Teleport User-agent: TeleportPro User-agent: WebZIP User-agent: linko User-agent: HTTrack User-agent: Microsoft.URL.Control User-agent: Xenu User-agent: larbin User-agent: libwww User-agent: ZyBORG User-agent: Download Ninja User-agent: fast User-agent: MLBot Disallow: / User-agent: 008 User-agent: NextGenSearchBot User-agent: Java/1.6.0_10 User-agent: Spinn3r User-agent: CrazyWebCrawler-Spider User-agent: wget User-agent: grub-client User-agent: k2spider User-agent: NPBot User-agent: WebReaper User-agent: magpie-crawler # Ask Fasterfox plugin not to prefetch User-agent: Fasterfox Disallow: / # Makes a lot of wrong requests - ignores this directive so it's totally blocked User-agent: JikeSpider Disallow: / # Makes a lot of wrong requests User-agent: YYSpider Disallow: / # More useless bots User-agent: TwengaBot Disallow: / User-agent: TosCrawler Disallow: / User-agent: SemrushBot-SA User-agent: BLEXBot User-agent: CamontSpider User-agent: spbot User-agent: CCBot User-agent: BDCbot User-agent: MauiBot User-agent: VelenPublicWebCrawler # More bad SEO bots - taken from https://www.shoutmeloud.com/what-is-robots-txt-file-and-how-to-optimize-for-wordpress-blogs.html User-agent: Alexibot User-agent: SurveyBot User-agent: Xenu’s User-agent: Xenu’s Link Sleuth 1.1c User-agent: rogerbot User-agent: ia_archiver User-agent: archive.org_bot User-agent: Archive.org Bot User-agent: LinkWalker User-agent: GigaBlast Spider User-agent: ia_archiver-web.archive.org User-agent: PicScout User-agent: TinEye User-agent: SEOkicks-Robot User-agent: SISTRIX Crawler User-agent: Ezooms Robot User-agent: netEstate NE Crawler (+http://www.website-datenbank.de/) User-agent: WiseGuys Robot User-agent: Turnitin Robot User-agent: Heritrix Disallow: / # Block pricepi User-agent: pimonster User-agent: Pimonster User-agent: Pi-Monster Disallow: / # Block Eniro User-agent: ECCP/1.0 (search@eniro.com) User-agent: Psbot User-agent: YoudaoBot User-agent: NaverBot User-agent: Yeti User-agent: ZBot User-agent: Vagabondo User-agent: SimplePie User-agent: Pixray-Seeker User-agent: BoardReader User-agent: Quantify User-agent: Plukkie User-agent: Cuam User-agent: ZoomBot User-Agent: trendictionbot User-agent: AwarioRssBot User-agent: AwarioSmartBot Disallow: / # no traffic from this "search engine" User-agent: SeekportBot Disallow: / # https://megaindex.com/crawler User-agent: MegaIndex.ru User-agent: megaindex.com User-agent: +http://megaindex.com/crawler User-agent: MegaIndex.ru/2.0 User-agent: megaIndex.ru Disallow: / #User-agent: Applebot #Disallow: / User-agent: Linespider User-agent: hyscore User-agent: DataForSeoBot User-agent: MaxPointCrawler User-agent: OAI-SearchBot User-agent: barkrowler User-agent: SenutoBot User-agent: IbouBot User-agent: TerraCotta User-agent: OAI-SearchBot User-agent: GPTBot User-agent: ChatGPT-User Disallow: / ############################################################################# # BEGINNING AUTOMATED PART - generated 2025-11-15T00:12:21+08:00 User-agent: AdsBot-Google User-agent: Mediapartners-Google User-agent: bingbot User-agent: YandexBot Crawl-delay: 2 Disallow: *.. Disallow: /teletalk/ Disallow: /downloads/ Disallow: /forums/download/ Disallow: /main/set_language/ Disallow: /gu Disallow: /ht Disallow: /kk Disallow: /ml Disallow: /mk Disallow: /pa/ Disallow: /sw Disallow: /ta Disallow: /te Disallow: /*/self$ Disallow: /*/self/ Disallow: /*/new/ Disallow: /*/online/ Disallow: /*/edit/ Disallow: /*/login$ Disallow: /*/account$ Disallow: /*/account/ Disallow: /*/collectors/rate/ Disallow: /*/collectors/log/ Disallow: /*/collectors/list/friends/ Disallow: /*/collectors/list/marked_as_friend/ Disallow: /*/collectors/list/watchlist/ Disallow: /*/collectors/list/best_matches/ Disallow: /*/collectors/log_global/ Disallow: /*/collectors/mark/ Disallow: /*/collectors/my_profile Disallow: /*/collectors/edit_log Disallow: /*/collectors/translation_log Disallow: /*/directory/ Disallow: /*/identify/ Disallow: /*/item/view_collectors/ Disallow: /*/main/ Disallow: /*/collectors/*/active/ Disallow: /*/cart/ Disallow: /*/seller/ Disallow: /*/transaction/ Disallow: /api/ Disallow: /*/api/ Disallow: /capi/ Disallow: /*/capi/ Disallow: /fld/ Disallow: /*/fld/ Disallow: /integrations/ Disallow: /*/integrations/ Disallow: /tool/ Disallow: /*/tool/ Disallow: /*unapproved/ Disallow: /*collection/ Disallow: /*swap_list/ Disallow: /*wish_list/ Disallow: /*ignore/ Disallow: /*buy_list/ Disallow: /*sell_list/ Disallow: /*custom_list Allow: /*by_collection/ Allow: /*by_swap_list/ Allow: /*by_wish_list/ #User-agent: SeznamBot #User-agent: Qwantbot #User-agent: Bytespider #User-agent: PetalBot #User-agent: coccocbot-web #User-agent: TikTokSpider #User-agent: meta-externalagent User-agent: * Crawl-delay: 10 Disallow: *.. Disallow: /teletalk/ Disallow: /downloads/ Disallow: /forums/download/ Disallow: /main/set_language/ Disallow: /gu Disallow: /ht Disallow: /kk Disallow: /ml Disallow: /mk Disallow: /pa/ Disallow: /sw Disallow: /ta Disallow: /te Disallow: /*/self$ Disallow: /*/self/ Disallow: /*/new/ Disallow: /*/online/ Disallow: /*/edit/ Disallow: /*/login$ Disallow: /*/account$ Disallow: /*/account/ Disallow: /*/collectors/rate/ Disallow: /*/collectors/log/ Disallow: /*/collectors/list/friends/ Disallow: /*/collectors/list/marked_as_friend/ Disallow: /*/collectors/list/watchlist/ Disallow: /*/collectors/list/best_matches/ Disallow: /*/collectors/log_global/ Disallow: /*/collectors/mark/ Disallow: /*/collectors/my_profile Disallow: /*/collectors/edit_log Disallow: /*/collectors/translation_log Disallow: /*/directory/ Disallow: /*/identify/ Disallow: /*/item/view_collectors/ Disallow: /*/main/ Disallow: /*/collectors/*/active/ Disallow: /*/cart/ Disallow: /*/seller/ Disallow: /*/transaction/ Disallow: /api/ Disallow: /*/api/ Disallow: /capi/ Disallow: /*/capi/ Disallow: /fld/ Disallow: /*/fld/ Disallow: /integrations/ Disallow: /*/integrations/ Disallow: /tool/ Disallow: /*/tool/ Disallow: /*unapproved/ Disallow: /*collection/ Disallow: /*swap_list/ Disallow: /*wish_list/ Disallow: /*ignore/ Disallow: /*buy_list/ Disallow: /*sell_list/ Disallow: /*custom_list Allow: /*by_collection/ Allow: /*by_swap_list/ Allow: /*by_wish_list/ # END AUTOMATED PART - generated 2025-11-15T00:12:21+08:00 #############################################################################