# If the Joomla site is installed within a folder such as at # e.g. www.example.com/joomla/ the robots.txt file MUST be # moved to the site root at e.g. www.example.com/robots.txt # AND the joomla folder name MUST be prefixed to the disallowed # path, e.g. the Disallow rule for the /administrator/ folder # MUST be changed to read Disallow: /joomla/administrator/ # # For more information about the robots.txt standard, see: # http://www.robotstxt.org/orig.html # # For syntax checking, see: # http://www.sxw.org.uk/computing/robots/check.html User-agent: * Disallow: /download/temp/ Disallow: /plugins/viewsource/ Disallow: ?reload=true$ Disallow: /*?rCH=2 Disallow: /*?rCH=-2 Disallow: /*? # Note: this files uses parameters specific to Google, parameters that are not in robots.txt standard # http://www.google.com/support/webmasters/, http://www.robotstxt.org/wc/faq.html and http://en.wikipedia.org/wiki/Robots_Exclusion_Standard were used to research said parameters # some links shouldn't show to an anonymous browser such as GAS but are included for completeness User-agent: * # match all bots. GSA is our primarly crawler but logs indicate there may be others on our Intranet #Crawl-delay: 5 # per http://en.wikipedia.org/wiki/Robots.txt#Nonstandard_extensions, sets number of seconds to wait between requests to 5 seconds. may not work Disallow: /news/mediaportal-1-13-0-release Disallow: /pages/ # this line to purge GSA of all old page entries, will be removed in next iteration so that specific /pages/ lines below take effect Disallow: /admin/ # administrator links Disallow: /adminstrators.action? # remove any administrator links Disallow: /createrssfeed.action? # remove internal RSS links Disallow: /dashboard.action? # remove the dashboard, heavy resource hit Disallow: /doexportpage.action? # remove pdf export links Disallow: /dopeopledirectorysearch.action # people search Disallow: /dosearchsite.action? # remove specific site searches Disallow: /exportword? # remove word export links Disallow: /login.action? # remove the login page # Allow: /pages/viewpage.action?* # allows indexing of pages with invalid titles for html (such as ?'s). Unfortunately currently allows page history to sneak in Disallow: /pages/ # this line to purge GSA of all old page entries, will be removed in next iteration so that specific /pages/ lines below take effect Disallow: /pages/copypage.action? # remove copy page links Disallow: /pages/createblogpost.action? # remove add news links Disallow: /pages/createpage.action? # remove add page links Disallow: /pages/diffpages.action? # remove page comparison pages Disallow: /pages/diffpagesbyversion.action? # remove page comparison links Disallow: /pages/editblogpost.action? # remove edit news links Disallow: /pages/editpage.action? # remove edit page links Disallow: /pages/removepage.action? # remove the remove page links Disallow: /pages/revertpagebacktoversion.action? # remove reversion links Disallow: /pages/templates # remove template pages Disallow: /pages/templates/ # block template indexes Disallow: /pages/viewchangessincelastlogin.action? # remove page comparison pages Disallow: /pages/viewpagesrc.action? # remove view page source links Disallow: /pages/viewpreviouspageversions.action? # remove the link to previous versions Disallow: /plugins/ # blocks plug-in calls Disallow: /rpc/ # remove any RPC links Disallow: /searchsite.action? # remove the wiki search engine pages Disallow: /spaces/ # remove space action pages Disallow: /themes/ # theme links Disallow: /users/ # remove user action pages Disallow: /x/ # remove tiny link urls User-agent: Googlebot-Image Disallow: /images/stories/homepageV3/ User-agent: SiteAuditBot Disallow: / User-agent: SemrushBot-BA Disallow: / User-agent: SemrushBot-SI Disallow: / User-agent: SemrushBot-SWA Disallow: / User-agent: SemrushBot-CT Disallow: / User-agent: SemrushBot-BM Disallow: / User-agent: SplitSignalBot Disallow: / User-agent: Mediapartners-Google Disallow: www.team-mediaportal.com/extensions/movies-videos # End file