*** ../cf/filter_innd.pl Sat Nov 22 23:43:16 1997 --- filter_innd.pl Sun Nov 23 18:11:55 1997 *************** *** 5,18 **** # by Jeremy Nixon # originally based upon some code from Jeff Garzik # # Install this file as filter_innd.pl, wherever your server expects # to find that file (possibly /usr/news/bin/control). # $maxgroups = 14; # maximum number of groups in a crosspost $maxfollowups = 7; # followups less than this is ok $maxmultiposts = 20; # start rejecting after this many copies ! $ArticleHistSize = 8500; # keep history of last N message ids $EMPHistSize = 1000; # number of EMP ids to hold in memory $tjfmaxgroups = 6; # max xposts in test, forsale, jobs groups $verbose = 1; # stick lots of cool stuff into news.notice? --- 5,41 ---- # by Jeremy Nixon # originally based upon some code from Jeff Garzik # + # Modified by Chris Siebenmann for operation + # with Narf (http://utcc.utoronto.ca/abuse/antispam-stuff/narf/). + # No longer works with INN. + # # Install this file as filter_innd.pl, wherever your server expects # to find that file (possibly /usr/news/bin/control). # + # What version of narf we require + $MINVER = 0.93; + # Insure that we have the necessary minimum version of Narf. + # Annoyingly we cannot just use die by itself, because the die's output + # will be swallowed by 'do'. + do { + print STDERR "filter_innd.pl requires Narf version $MINVER or better: only have $NARFVERSION.\n"; + die "narf version error"; + } unless ($::NARFVERSION >= $MINVER); + + use MD5; + $md5bi = 20; # The deadly SBI for MD5 hashes. Note that + # for MD5 matches we compute the true SBI, + # not just count posts seen. + # BI 20 is standard, although SBI is not. + $BIHistSize = 10000; # Keep the SBI history for the last this many + # articles around (give or take some margins) + + $maxgroups = 14; # maximum number of groups in a crosspost $maxfollowups = 7; # followups less than this is ok $maxmultiposts = 20; # start rejecting after this many copies ! $ArticleHistSize = 4000; # keep history of last N message ids $EMPHistSize = 1000; # number of EMP ids to hold in memory $tjfmaxgroups = 6; # max xposts in test, forsale, jobs groups $verbose = 1; # stick lots of cool stuff into news.notice? *************** *** 81,87 **** --- 104,113 ---- ($hdr{"Newsgroups"} =~ /$bin_allowed/o && $hdr{'Lines'} > 999)) { $hash2 = "$hdr{'NNTP-Posting-Host'} $hdr{'Lines'}" if ($hdr{'NNTP-Posting-Host'}); } + undef $hash3; + $hash3 = &filter_bodyhash(); + return "EMP rejected" . ($verbose ? " (md5)" : "") if ($EMP{$hash3}); return "EMP rejected" . ($verbose ? " (f/s/l)" : "") if ($EMP{$hash1}); return "EMP rejected" . ($verbose ? " (ph/l)" : "") if ($EMP{$hash2}); *************** *** 185,194 **** push @history, $hash2; $history{$hash2}++; } # If post appears more than high limit, save for # continual rejection, outside of history window ! if ($history{$hash1} > $maxmultiposts) { savehist ($hash1); $empreturn = "New EMP detected (f/s/l)"; } elsif (($hash2) and ($history{$hash2} > $maxmultiposts)) { --- 211,233 ---- push @history, $hash2; $history{$hash2}++; } + if ($hash3) { + $ngc = ($hdr{'Newsgroups'} =~ tr /,//)+1; + $ftc = ($hdr{'Followup-To'} =~ tr /,//)+1 if ($hdr{'Followup-To'}); + $ngc = $ftc if ($ftc < $ngc); + $bistash{$hash3} += (sqrt($ngc)+$ngc)/2; + push (@bihist, $hash3); + $bihist{$hash3}++; + } # If post appears more than high limit, save for # continual rejection, outside of history window ! if ($bihist{$hash3} >= $md5bi) { ! savehist ($hash3); ! delete $bihist{$hash3}; delete $bistash{$hash3}; ! @bihist = grep (($_ ne $hash3), @bihist); ! $empreturn = "New EMP detected (md5)"; ! } elsif ($history{$hash1} > $maxmultiposts) { savehist ($hash1); $empreturn = "New EMP detected (f/s/l)"; } elsif (($hash2) and ($history{$hash2} > $maxmultiposts)) { *************** *** 209,214 **** --- 248,262 ---- $history{$tmp_hist}--; } } + while ($#bihist > $BIHistSize) { + $tmp_hist = shift @bihist; + next unless (exists $bihist{$tmp_hist}); + if ($bihist{$tmp_hist} < 2) { + delete $bihist{$tmp_hist}; delete $bistash{$tmp_hist}; + } else { + $bihist{$tmp_hist}--; + } + } return $empreturn; } # news.lists.filters/alt.nocem.misc } # news.admin.net-abuse.* *************** *** 263,266 **** --- 311,328 ---- sub filter_mode { return; + } + + $::MaxArtSize = 150000; # If the article is at least this big, our + # behavior is the same no matter how much + # bigger it is. It is safe to truncate the + # article body at this size. + sub filter_bodyhash { + return $cache_hash if ($hdr{'Message-ID'} eq $cache_msgid); + my $l = length($hdr{'__BODY__'}); + undef $cache_hash; + $cache_hash = MD5->hexhash($hdr{'__BODY__'}) . " $l" + if ($l < $::MaxArtSize); + $cache_msgid = $hdr{'Message-ID'}; + return $cache_hash; }