# Robots.txt file created by http://www.webtoolcentral.com # Checked by: http://tool.motoricerca.info/robots-checker.phtml # Alternate creator: http://www.mcanerin.com/EN/search-engine/robots-txt.asp # For domain: http://www.sethi.org # Reference: http://pageresource.com/zine/robotstxt.htm and http://www.searchtools.com/robots/robots-txt.html # Apply to just the Wayback Machine: http://www.archive.org/about/exclude.php # Check for http://www.sethi.org/investments/darvas/darvas.phps User-agent: ia_archiver Disallow: /cgi-bin/ Disallow: /investments/ Disallow: /tools/ Disallow: /genealogy/ # Disallow wget also? #;User-agent: wget #;Disallow: / # Disallow Scooter/1.0 #;User-agent: Scooter/1.0 #;Disallow: / # Disallow Bilbo/1.2+WAP #;User-agent: Bilbo/1.2+WAP #;Disallow: / # Allow these bots to get at everything: # robots.txt generated at www.mcanerin.com # Google User-agent: Googlebot Disallow: /cgi-bin/ # MSN User-agent: MSNBot Disallow: /cgi-bin/ # Yahoo User-agent: Slurp Disallow: /cgi-bin/ # Ask/Teoma User-agent: Teoma Disallow: /cgi-bin/ # DMOZ User-agent: Robozilla Disallow: /cgi-bin/ # All robots will spider the domain User-agent: * # Disallow directory /analog/ Disallow: /analog/ # Disallow directory /cgi-bin/ Disallow: /cgi-bin/ # Disallow directory /guestbook/ (except for exceptions above): Disallow: /guestbook/ # Disallow directory /utils/ Disallow: /utils/ Crawl-delay: 120