%
'This ASP script originally lived at http://evolvedcode.net/ for the
'original version of this script and a wide variety of other
'scripts, please visit the site.
'
'Last update: 2004.04.21 14:41:01
Sub DenyCrawler()
'Code which blocks any requests from known "bad" crawlers or NULL user-agents
Dim sUserAgent
sUserAgent = Trim( Request.ServerVariables("HTTP_USER_AGENT") )
'Check if the request is coming from something which is less than desirable
If EmptyUA_Test( sUserAgent ) Or BadUA_Test( sUserAgent ) Or BadIP_Test( Request.ServerVariables("Remote_Addr") ) Then
Response.Status = "403 Access Denied"
%>
<%= Response.Status %>
<%= Response.Status %>
Access to the requested resource has been denied.
We suspect you are using an automated process to access this website. Please retry with a regular browser.
<%
Response.End
End If
End Sub
Function TestRegExp( ByVal sInput, ByVal sRegExp )
'Code to evaluate a regular expression
Dim objRegular
Set objRegular = New RegExp
objRegular.Pattern = sRegExp
objRegular.IgnoreCase = True
TestRegExp = objRegular.Test( sInput )
Set objRegular = Nothing
End Function
Sub UA_Add( ByVal sNewUserAgent, ByRef sUserAgents )
'Code to add an extra user-agent into a list suitable for
' parsing with a regular expression
If sUserAgents = vbNullString Then
sUserAgents = sNewUserAgent
Else
sUserAgents = sUserAgents & "|" & sNewUserAgent
End If
End Sub
Function EmptyUA_Test( ByVal sUserAgent )
'Code to check if a UA is an empty or v. small piece of text
EmptyUA_Test = False
sUserAgent = Trim( sUserAgent )
If sUserAgent = vbNullString Or Len( sUserAgent ) = 1 Then
EmptyUA_Test = True
End If
End Function
Function BadUA_Test( ByVal sUserAgent )
'Code to check if UA appears to be in a list of badly behaved agents, e-mail harvesting
' robots, offline readers and other undesirable crawlers
Dim sUserAgentList
BadUA_Test = False
'Build up a list of spambots and other undesirable crawlers using a mix of
' both known bad crawlers and keyword matching to detect new crawlers and
' variations on a theme
UA_Add "^Mozilla/\d\.\d\s\(compatible;\sAdvanced\sEmail\sExtractor\sv\d\.\d+\)$", sUserAgentList
UA_Add "CherryPicker", sUserAgentList
UA_Add "Crescent", sUserAgentList
UA_Add "^DA\s\d\.\d+$", sUserAgentList
UA_Add "^Mozilla/\d\.\d\s\(compatible;\sMSIE\s\d\.\d;\sWindows\sNT;\sDigExt;\sDTS\sAgent$", sUserAgentList
UA_Add "EasyDL/\d\.\d+", sUserAgentList
UA_Add "e-collector", sUserAgentList
UA_Add "EmailCollector", sUserAgentList
UA_Add "^EmailSiphon$", sUserAgentList
UA_Add "EmailWolf", sUserAgentList
UA_Add "ExtractorPro", sUserAgentList
UA_Add "Go!Zilla", sUserAgentList
UA_Add "GetRight/\d.\d", sUserAgentList
UA_Add "^ia_archiver$", sUserAgentList
UA_Add "Indy\sLibrary", sUserAgentList
UA_Add "larbin", sUserAgentList
UA_Add "MSIECrawler", sUserAgentList
UA_Add "Microsoft\sURL\sControl", sUserAgentList
UA_Add "NEWT\sActiveX", sUserAgentList
UA_Add "NICErsPRO", sUserAgentList
UA_Add "RealDownload/\d\.\d\.\d\.\d", sUserAgentList
UA_Add "Teleport", sUserAgentList
UA_Add "Telesoft", sUserAgentList
UA_Add "UtilMind\sHTTPGet", sUserAgentList
UA_Add "WebBandit", sUserAgentList
UA_Add "webcollage/\d\.\d\d", sUserAgentList
UA_Add "WebCopier\sv\d\.\d", sUserAgentList
UA_Add "WebEMailExtrac", sUserAgentList
UA_Add "WebZIP", sUserAgentList
UA_Add "^WGet/\d\.\d", sUserAgentList
UA_Add "^Zeus.+Webster", sUserAgentList
UA_Add "^Mozilla/3\.Mozilla/2\.01\s\(Win95;\sI\)$", sUserAgentList
UA_Add "^Internet\sExplorer\s?\d?\.?\d?$", sUserAgentList
UA_Add "^IE\s\d\.\d\sCompatible.*Browser$", sUserAgentList
UA_Add "^Microsoft\sInternet\sExplorer/4\.40\.426\s\(Windows\s95\)$", sUserAgentList
UA_Add "^SurveyBot/\d\.\d\s(Whois\sSource|\(Whois\sSource\))$", sUserAgentList
UA_Add "^Mozilla/4\.0\s\(?hhjhj@yahoo\.com\)?$", sUserAgentList
UA_Add "^MSIE", sUserAgentList
UA_Add "^Mozilla$", sUserAgentList
UA_Add "^Mozilla(\\|/)\?\?$", sUserAgentList
UA_Add "^Internet\sExplore\s?\d?\.?[a-z0-9]+$", sUserAgentList
UA_Add "^IAArchiver-\d\.\d$", sUserAgentList
UA_Add "^NPBot(-\d/\d\.\d)?(\s\(http://www\.nameprotect\.com/botinfo\.html\))?$", sUserAgentList
UA_Add "^Webclipping\.com$", sUserAgentList
UA_Add "^Mozilla/\d\.\d\s\(X11;\sLinux\si686;\sen-US;\srv:\d.\d[a-z0-9]*;\sOBJR\)$", sUserAgentList
UA_Add "^Sqworm/\d\.\d\.\d\d-BETA\s\(beta_release;\s\d{8}-\d{3};\si\d{3}-pc-linux-gnu\)$", sUserAgentList
UA_Add "^Lickity_Split/\d\.\d$", sUserAgentList
UA_Add "^Production\sBot\s\d+B$", sUserAgentList
UA_Add "^amzn_assoc$", sUserAgentList
UA_Add "^Harvest", sUserAgentList
UA_Add "^Webdup/\d\.\d$", sUserAgentList
UA_Add "^WebIndex/\d\.\d[a-z]$", sUserAgentList
UA_Add "(^|\s)RPT-HTTPClient/\d\.\d-\d$", sUserAgentList
UA_Add "^sitecheck\.internetseer\.com\s\(For\smore\sinfo\ssee:\shttp://sitecheck\.internetseer\.com\)$", sUserAgentList
UA_Add "^vspider$", sUserAgentList
UA_Add "^k2spider$", sUserAgentList
UA_Add "^Mac\sFinder\s", sUserAgentList
UA_Add "^ICU\sv", sUserAgentList
UA_Add "^DART$", sUserAgentList
UA_Add "^Mozilla/\d\.\d\s\(compatible;\sMSIE\s\d\.\d;\sWindows\sNT\s\d\.\d;\sQ\d{6};\s\.NET\sCLR\s\d\.\d\.\d{4}$", sUserAgentList
UA_Add "^COMBOMANIA$", sUserAgentList
UA_Add "^MyCrawler$", sUserAgentList
UA_Add "^Mozilla/\d\.\d\s\(compatible;\sWin32;\sWinHttp\.WinHttpRequest\.\d\)$", sUserAgentList
UA_Add "^WEP\sSearch\s\d+$", sUserAgentList
UA_Add "^Mozilla/\d\.\d\s\(fantomBrowser\)$", sUserAgentList
UA_Add "^TE$", sUserAgentList
UA_Add "^WebStripper/\d\.\d\d$", sUserAgentList
UA_Add "^OWR_Crawler\s\d\.\d$", sUserAgentList
UA_Add "^WebMiner/\d\.\d\s\[en\]\s\(Win\d\d;\sI\)$", sUserAgentList
UA_Add "^WebGather\s\d\.\d$", sUserAgentList
UA_Add "^readwebpage$", sUserAgentList
UA_Add "^InstantSSL\sBrowser:\slow\scost\sfully\svalidated\sSSL\s\+\sfree\strial$", sUserAgentList
UA_Add "^Mozilla/\d\.\d\s\(compatible;\sHTTrack\s2\.0x;\sWindows\s.+\)$", sUserAgentList
UA_Add "^Mozilla/4\.0\s\(compatible;\sPowermarks/\d\.\d;\sWindows\s.+\)$", sUserAgentList
UA_Add "^Vivante\sLink\sChecker\s\(http://www\.vivante\.com\)$", sUserAgentList
UA_Add "^Mozilla/\d\.\d\s\(compatible;\sWindows\sNT\s\d\.\d;\sABN\sAMRO$", sUserAgentList
UA_Add "^Mozilla/\d\.\d\s\(compatible;\sIntelliseek;\shttp://www\.intelliseek\.com\)$", sUserAgentList
UA_Add "^WebCopier\sSession\s\d$", sUserAgentList
UA_Add "^Mozilla/\d\.\d\s\(compatible;\sMSIE\s\d\.\d\d;\sWindows\s\d\d$", sUserAgentList
UA_Add "^Art-Online\.com\s\d\.\d\(Beta\)$", sUserAgentList
UA_Add "^WebGo\s", sUserAgentList
UA_Add "^SuperBot/\d\.\d\s\(Win\d\d\)$", sUserAgentList
UA_Add "^Download\sNinja\s\d\.\d$", sUserAgentList
UA_Add "^Mozilla/\d\.\d\s\(compatible;\sMSIE\s\d\.\d;\sWindows\sNT\s\d\.\d;\s\.NET\sCLR\s\d\.\d\.\d{4}$", sUserAgentList
UA_Add "^Expired\sDomain\sSleuth$", sUserAgentList
UA_Add "^SHARP-TQ-GX\d\d$", sUserAgentList
UA_Add "^HTTP/\d\.\d\sMozilla/\d\.\d\+\(compatible;\+MSIE\+\d\.\d;\+Windows\+NT\+\d\.\d\)$", sUserAgentList
UA_Add "^.+/\d+\.\d+\s\(Version:\s\d+\sType:\d+\)$", sUserAgentList
UA_Add "^Offline\sExplorer/\d+\.\d+$", sUserAgentList
If sUserAgentList = vbNullString Then
sUserAgentList = "^nulldata$"
End If
BadUA_Test = TestRegExp(sUserAgent, sUserAgentList)
End Function
Function BadIP_Test( ByVal sIPAddr )
'Code to check if an IP appears to be in a our list
Dim sIPList
BadIP_Test = False
'Assemble the list
'UA_Add "^127\.0\.0\.1$", sIPList 'Block a single IP, 127.0.0.1
'UA_Add "^127\.0\.0\.", sIPList 'Block any IP starting with 127.0.0.
'UA_Add "^127\.0\.", sIPList 'Block any IP starting with 127.0.
If sIPList = vbNullString Then
sIPList = "^nulldata$"
End If
BadIP_Test = TestRegExp( sIPAddr, sIPList )
End Function
'--Stand-alone testing code--
'DenyCrawler()
'Response.Write "If you can see this the script works and you aren't listed as a bad IP or UA."
'--Stand-alone testing code--
%>