# Clean Feed # Version 0.95 19 November 1997 # # Spam filter for INN 1.5.1 and later # by Jeremy Nixon # originally based upon some code from Jeff Garzik # # Modified by Chris Siebenmann for operation # with Narf (http://utcc.utoronto.ca/abuse/antispam-stuff/narf/). # No longer works with INN. # # Install this file as filter_innd.pl, wherever your server expects # to find that file (possibly /usr/news/bin/control). # # What version of narf we require $MINVER = 0.93; # Insure that we have the necessary minimum version of Narf. # Annoyingly we cannot just use die by itself, because the die's output # will be swallowed by 'do'. do { print STDERR "filter_innd.pl requires Narf version $MINVER or better: only have $NARFVERSION.\n"; die "narf version error"; } unless ($::NARFVERSION >= $MINVER); use MD5; $md5bi = 20; # The deadly SBI for MD5 hashes. Note that # for MD5 matches we compute the true SBI, # not just count posts seen. # BI 20 is standard, although SBI is not. $BIHistSize = 10000; # Keep the SBI history for the last this many # articles around (give or take some margins) $maxgroups = 14; # maximum number of groups in a crosspost $maxfollowups = 7; # followups less than this is ok $maxmultiposts = 20; # start rejecting after this many copies $ArticleHistSize = 4000; # keep history of last N message ids $EMPHistSize = 1000; # number of EMP ids to hold in memory $tjfmaxgroups = 6; # max xposts in test, forsale, jobs groups $verbose = 1; # stick lots of cool stuff into news.notice? $block_binaries = 0; # set to 1 to block misplaced binaries $max_encoded_lines = 10; # number of encoded binary lines to allow $block_mime_html = 1; # set to 1 to block MIME encapsulated HTML # (NOT straight or multipart/alternative) $block_html = 0; # set to 1 to block HTML and multipart/alternative # binaries allowed if groups match $bin_allowed = '\.binaries|alt\.sex\.pictures|alt\.anonymous\.messages'; $badguys = 'eroticboxoffice|sweet18|kissingirls|erosnet|shockingpink|dominationx'. '|savetrees|porn4porn|blowme|24hrxxx|cherrypoppers|schoolparty'. '|freesex|xxxfreevideo|backdoor|hotheros|velocity|holowww|\w+\.holowww'. '|gigapix|6t9|nudesights|porngodess|phatt|rawxxxfun|pornking|hardsexjunkies'. '|pornomaster|xxxsexjunkie|sloppysex|sex-seek|hotorgasms|clitoris'. '|snatch|xratedx|the-love-shack|sensuality|lovetunnel|xsearchx|slutboys'. '|domxxx|peepingtoms|pervert|free-pics|steamy-hot|dreamlands|amxxx'. '|youwish|ilovecelebs|dirtysecrets|lynxxx|earthcalls|debutantes'. '|answerme|nudists|xxxforyou|emi|teentown|wildchild|ilovelez|beach-girls'. '|c-flash|sexjunky|nymphette|stobblehouse|latexfetish|harddicks|smuthole'. '|\w+\.mnet1|pictureview|lasersex|ictcom|babesofage|arg-inc|lick|planetofthebabes'. '|hotsexnow|bmc-engineering|nastygirlz|marys-place|ucla\.dorms'. '|animalistik|malebytes|southcorp|mansclub|eroticon6|inet-images'. '|legalteens|bootycamp|cafeflesh|sexzilla|jalapeno|orchidvideos'; $badips = '204\.248\.170\.93|207\.70\.214\.|208\.208\.223\.|209\.51\.194\.'. '|207\.156\.223\.|205\.147\.202\.|206\.161\.233\.228|208\.223\.114\.11'. '|208\.214\.26\.231'; $exempt = 'news\.aol\.com|news\.newsdawg\.com|webtv\.net|newscene\.newscene\.com'. '|hme1-1\.news\.sprint\.ca|news1\.mcps\.com|^localhost$'; $tlds = 'com|net|org|edu|nl|de|no|dk|ch'; $ci_begin = '[Bb][Ee][Gg][Ii][Nn]'; $ci_ctype = '[Cc][Oo][Nn][Tt][Ee][Nn][Tt]-[Tt][Yy][Pp][Ee]'; $ci_cte = '[Cc][Oo][Nn][Tt][Ee][Nn][Tt]-[Tt][Rr][Aa][Nn][Ss][Ff][Ee][Rr]-[Ee][Nn][Cc][Oo][Dd][Ii][Nn][Gg]'; $ci_txht = '(?:[Tt][Ee]?[Xx][Tt]|[Hh][Tt][Mm][Ll]?)'; $ci_html = '[Tt][Ee][Xx][Tt]\/[Hh][Tt][Mm][Ll]'; $base64_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\+\/'; $uu_chars = q| `!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_|; $uu_chars =~ s/(\W)/\\$1/g; $sex = 'sex|xxx|fuck'; $servPre = '(?:free|cheap|unlimited|nationwide|hard.?core)'; $servPost = '(?:free|minute|samples|800|900)'; $servStr = "(?:phone.{0,15}(?:$sex)|(?:adult|$sex).{0,10}(?:chat|site)". "|(?:$sex).{0,15}(?:shows|calls|connections|pi(?:cs|x|ctures))". "|hardcore video|900 dateline|mass e?-?mail)"; $services = "(?:$servPre.{0,30}?$servStr)|(?:$servStr.{0,30}?$servPost)"; sub filter_art { if (! $hdr{"Control"}) { if ($hdr{"Sender"}) { $hash1 = lc("$hdr{'Sender'} $hdr{'Subject'} $hdr{'Lines'}"); } else { $hash1 = lc("$hdr{'From'} $hdr{'Subject'} $hdr{'Lines'}"); } undef $hash2; unless ($hdr{"NNTP-Posting-Host"} =~ /(?:$exempt)/o || ($hdr{"Newsgroups"} =~ /$bin_allowed/o && $hdr{'Lines'} > 999)) { $hash2 = "$hdr{'NNTP-Posting-Host'} $hdr{'Lines'}" if ($hdr{'NNTP-Posting-Host'}); } undef $hash3; $hash3 = &filter_bodyhash(); return "EMP rejected" . ($verbose ? " (md5)" : "") if ($EMP{$hash3}); return "EMP rejected" . ($verbose ? " (f/s/l)" : "") if ($EMP{$hash1}); return "EMP rejected" . ($verbose ? " (ph/l)" : "") if ($EMP{$hash2}); ($hdr{"Newsgroups"} =~ /(?:,+[^,]+){$maxgroups,}?/) && ((! $hdr{"Followup-To"}) || ($hdr{"Followup-To"} =~ /(?:,+[^,]+){$maxfollowups,}?/)) && (return "Too many newsgroups"); ($hdr{"Newsgroups"} =~ /alt\.binaires/) && (return "Misspelling trap"); if ($block_binaries) { @gr = split /,+/, $hdr{"Newsgroups"}; if (grep !/$bin_allowed/o, @gr) { (($hdr{"__BODY__"} =~ /(?:^[\s>]*[$base64_chars]{59,76}\s*\n){$max_encoded_lines,}?/mo) || ($hdr{"__BODY__"} =~ /(?:^[\s>]*M[$uu_chars]{60,61}\s*\n){$max_encoded_lines,}?/mo)) && (return "Binary in non-binary group"); } } if ($block_html) { (lc($hdr{"Content-Type"}) =~ /text\/html|multipart\/alternative/) && (return "HTML post"); (lc($hdr{"Content-Type"}) =~ /multipart\/mixed/) && ($hdr{"__BODY__"} =~ /^$ci_ctype:\s$ci_html/) && (return "HTML post"); } ($hdr{"Newsgroups"} =~ /(?:[^,]+\.(test|jobs|forsale).*,){$tjfmaxgroups,}?/) && (return $verbose ? "$1 group: too many crossposts" : "Too many newsgroups"); if ($hdr{"Newsgroups"} !~ /news\.admin\.net-abuse/) { $organization = lc($hdr{"Organization"}); ((lc($hdr{"From"}) =~ /(\b(?:$badguys)\.(?:$tlds)\b|$badips)/o) || ($organization =~ /((?:\b$badguys)\.(?:$tlds)\b|$badips)/o)) && (return $verbose ? "Spam - $1" : "Spam domain"); (($hdr{"Path"} =~ /news\.florida-isp\.com!/) || ($hdr{"NNTP-Posting-Host"} =~ /(?:florida-isp|escortguide|metrosupport)\.com/)) && (return $verbose ? "Spam - florida-isp.com" : "Spam domain"); ($organization =~ /adult\s+internet\s+access/) && (return $verbose ? "Spam - Adult Internet Access" : "Spam domain"); (($organization =~ /(?:net|sex)zilla\.(?:net|com)|repost.*unauthorized.*cancel/) || ($hdr{"Message-ID"} =~ /netzilla\.net|sexzilla\.com|207\.70\.214\.|roo\.fosta\.au|rua\.kanuck\.ca|\@silicom\.omega\.es/) || ($hdr{"Path"} =~ /newsfeeds\.com|news\.jam\.com|news\.cm\.org!bcarh189\.bnr\.ca|bcarh8c\.bnr\.ca!(?:enzo\.grapa\.it|news\.primenet\.com)/)) && (return $verbose ? "Spam - Netzilla" : "Spam domain"); ($hdr{"__BODY__"} =~ /^$ci_begin\s+[0-7]{3,4}\s+\S?.{0,45}?\S+\.${ci_txht}\n+(?:^[\s|>]*M[$uu_chars]{60,61}\s*\n){5,}?/mo) && (return "UUencoded HTML/text spam"); if ($block_mime_html) { (($hdr{"Content-Disposition"} =~ /filename.*\.html?/) || ($hdr{"Content-Base"} =~ /file:.*\.html?/) || ((lc($hdr{"Content-Type"}) =~ /multipart\/mixed/) && ($hdr{"__BODY__"} =~ /^$ci_ctype:\s+text\/html/mo) && ($hdr{"__BODY__"} =~ /^$ci_cte:\s[Bb][Aa][Ss][Ee]64/mo))) && (return "Misc HTML spam"); } ($organization =~ /email\s+platinum/) && (return $verbose ? "Email Platinum" : "Bot signature"); if (! $hdr{"References"}) { ($hdr{"Organization"} =~ /^[A-Z]{2,3}\sInc\.\s*$/) && ($hdr{"Message-ID"} =~ /<\d{12}\.\d{10}\@/) && (return $verbose ? "Adultsights bot" : "Bot signature"); ($hdr{"Message-ID"} =~ /msgidabcxyz\.com/) && (return $verbose ? "Bot - msgidabcxyz" : "Bot signature"); ($hdr{"__BODY__"} =~ /\.*[\r<=>]+\r[\r<=>]+$/m) && (return $verbose ? "Angle-bracket bot" : "Bot signature"); if (($hdr{"Newsgroups"} !~ /news\.lists\.filters|alt\.nocem\.misc/) && ($hdr{"From"} !~ /nl-cancel\@.*xs4all\.nl|lendl\@.*sbg\.ac\.at|lendl\@eddie\.ping\.at/)) { my ($body) = lc(substr($hdr{"__BODY__"},0,50000)); ($body =~ /http:..((?:www\.)?(?:$badguys)\.(?:$tlds)|$badips)/o) && (return $verbose ? "Spam - $1" : "Spam domain"); (lc($hdr{"Subject"}) =~ /($services)/o) && (return "Services spam" . ($verbose ? " - $1" : "")); } } # references if ($hdr{"Newsgroups"} !~ /news\.lists\.filters|alt\.nocem\.misc/) { $empreturn = ""; # Store From, Subject, and Lines in history array and hash push @history, $hash1; $history{$hash1}++; # Store NNTP-Posting-Host and Lines if ($hash2) { push @history, $hash2; $history{$hash2}++; } if ($hash3) { $ngc = ($hdr{'Newsgroups'} =~ tr /,//)+1; $ftc = ($hdr{'Followup-To'} =~ tr /,//)+1 if ($hdr{'Followup-To'}); $ngc = $ftc if ($ftc < $ngc); $bistash{$hash3} += (sqrt($ngc)+$ngc)/2; push (@bihist, $hash3); $bihist{$hash3}++; } # If post appears more than high limit, save for # continual rejection, outside of history window if ($bihist{$hash3} >= $md5bi) { savehist ($hash3); delete $bihist{$hash3}; delete $bistash{$hash3}; @bihist = grep (($_ ne $hash3), @bihist); $empreturn = "New EMP detected (md5)"; } elsif ($history{$hash1} > $maxmultiposts) { savehist ($hash1); $empreturn = "New EMP detected (f/s/l)"; } elsif (($hash2) and ($history{$hash2} > $maxmultiposts)) { savehist ($hash2); $empreturn = "New EMP detected (ph/l)"; } # Trim old entries from history file. Remember that # data structure stays around, even between filter.perl # reloads. while ($#history > $ArticleHistSize) { $tmp_hist = shift @history; next unless (exists $history{$tmp_hist}); if ($history{$tmp_hist} < 2) { delete $history{$tmp_hist}; } else { $history{$tmp_hist}--; } } while ($#bihist > $BIHistSize) { $tmp_hist = shift @bihist; next unless (exists $bihist{$tmp_hist}); if ($bihist{$tmp_hist} < 2) { delete $bihist{$tmp_hist}; delete $bistash{$tmp_hist}; } else { $bihist{$tmp_hist}--; } } return $empreturn; } # news.lists.filters/alt.nocem.misc } # news.admin.net-abuse.* } elsif ($hdr{"Control"} =~ /cancel/) { (($hdr{"X-Cancelbot"}) || ($hdr{"__BODY__"} =~ /Usenet.*Cancel.*Engine/ms)) && (return "Rogue cancel" . ($verbose ? " (UCE)" : "")); ($hdr{"Newsgroups"} =~ /control\.cancel/) && (return "Rogue cancel" . ($verbose ? " (newsgroups)" : "")); ($hdr{"Path"} =~ /(winternet\.com|ftp\.uu\.net)/) && (return $verbose ? "Cancel with $1 in path" : "Rogue cancel"); ($hdr{"Organization"} =~ /Crislewis/) && (return "Rogue cancel" . ($verbose ? " (Organization)" : "")); ($hdr{"Path"} =~ /((?:hacker|crack|porn|cripple|gimp|cunt|hole|fag|aids|faq|god|hindu|dothead|jew|kike|moslem|towelhead|nazi|kraut|nerd|geek|nigger|redneck|rice|slanteye|spick|whine)cancel|cyberwhin(?:er|ing))/) && (return "Rogue cancel" . ($verbose ? " - $1" : "")); } elsif ($hdr{"Control"} =~ /(?:new|rm)group\s(?:comp|misc|news|rec|soc|sci|humanities|talk)/) { ($hdr{"From"} !~ /group-admin\@isc\.org/) && (return $verbose ? "Big 8 control message from wrong address" : "Bad control message"); } elsif ($hdr{"Control"} =~ /(?:new|rm)group\s/) { ($hdr{"__BODY__"} =~ /Control message generated by Netscape Collabra Server/) && (return $verbose ? "Bogus control message from Collabra luser" : "Bad control message"); (($hdr{"From"} =~ /sexzilla\.com|netzilla\.net/) || ($hdr{"Control"} =~ /newgroup.*(?:sexzilla|netzilla)/)) && (return $verbose ? "Sexzilla newgroup message" : "Bad control message"); } return ""; } sub savehist { my ($key) = @_; while ($#EMP_Hist > $EMPHistSize) { $tmp_hist = shift @EMP_Hist; delete $EMP{$tmp_hist} if (exists $EMP{$tmp_hist}); } $EMP{$key} = 1; push @EMP_Hist, $key; delete $history{$key}; @history = grep (!($_ eq $key), @history); } sub filter_mode { return; } $::MaxArtSize = 150000; # If the article is at least this big, our # behavior is the same no matter how much # bigger it is. It is safe to truncate the # article body at this size. sub filter_bodyhash { return $cache_hash if ($hdr{'Message-ID'} eq $cache_msgid); my $l = length($hdr{'__BODY__'}); undef $cache_hash; $cache_hash = MD5->hexhash($hdr{'__BODY__'}) . " $l" if ($l < $::MaxArtSize); $cache_msgid = $hdr{'Message-ID'}; return $cache_hash; }