#!/usr/bin/perl # # handlespam - Automate the processing of spam # # By: Theo Van Dinter (felicity@kluge.net) (c) 1998-2002 # Revision Info: $Id: handlespam 210 2005-09-27 00:29:31Z felicity $ # # I found that I was spending too much time handling spam, so I automated # what I could to help save time. # # This script will do each of the following for each input message # (mbox format): # # ** strip out my X-Reject-style headers for all processing except archiving # ** report the message ala 'spamassassin -r' to Razor, DCC, Pyzor, and # (if available) the Bayes classifier # ** if the message was relayed through a third-party (there are more than 1 # "Received:" headers,) do an open-relay check of that server. if the # server is an open-relay, report them to various open relay databases. # Need my "testrelay" script for this, so off by default. # ** if the sending server doesn't have a proper lookup, block their class C # network. this is a little extreme, I know, but the majority of spam is # either relayed through someone who has no clue, or is directly from # someone without a clue. any decently managed network will have proper # DNS setup for their hosts. # ** report the message to spamcop # ** report the message to the FTC # ** move the message to a spam archive for later referencing # ** if the message wasn't caught by spamassassin (SA), bounce to the # spamassassin-sightings mailing list. (No "X-Spam-Status: Yes" header) # This list is actually defunct now, so the feature is off by default. # ** if the message is a bounce from majordomo (for "X-Spam-Flag: YES"), # then unbounce the message before processing. # # # Incoming mail is scanned via SpamAssassin. Mail that is determined # to be spam is saved into "spam-work". I then periodically go through # and take all actual spam and move it into a folder I call "hs". A cron # job then runs this script over "hs" to handle the reporting process. # Output from handlespam is sent to me via cron, so I can easily see what # was handled, and I can easily cut/paste into my sendmail accessdb. # # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; either version 2 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # $|++; use strict; use Mail::Internet; use Mail::SpamAssassin; use Getopt::Long; # Needs at least perl 5.6.1 and SA 3.0.0 ... require v5.6.1; die "Need SpamAssassin 3.0.0 or higher!\n" unless ($Mail::SpamAssassin::VERSION >= 3.000000); # X-$x_header.* is how we markup things about the message # -Reject is flags for spam my $x_header = 'TVD'; my $fromAddr = $ENV{'LOGNAME'}; # what address for reporting # open-relay address => value. # value: 0 = Send "relay: IP" per message with no body # 1 = Send "relay: IP" per message with body # 2 = Send "relay: IP" as a batch after processing with no bodies # my %relays = ( 'relays@mail-abuse.org' => 1, 'relays@ordb.org' => 2, 'relays@relays.osirusoft.com' => 2, ); # Addresses we should send copies of the spam to. # http://www.ftc.gov/bcp/conline/pubs/online/inbox.htm # What Can I Do With the Spam in my In-Box? # Report it to the Federal Trade Commission. Send a copy of unwanted or # deceptive messages to spam@uce.gov. The FTC uses the unsolicited emails # stored in this database to pursue law enforcement actions against people # who send deceptive spam email. my @sendcopy = ('spam@uce.gov'); # Skip relay/block checks for these nets/relays my %skiprelay = ( '65.214.43' => 'techtarget.com', ); my $sightings = '' ; # 'spamassassin-sightings@lists.sf.net'; # where to send sightings, now defunct my $archive = $ENV{'HOME'} . "/storage/mail/spammers"; # where to save spam my $spamcop = ''; # spamcop addr my $testrelay = ''; # location of testrelay my $report = 1; # Set to 0 if you don't want to report ala 'spamassassin -r' my $debug = 0; # don't actually do anything external ... my $maxSpamcopSize = undef; # Maximum message size to send to SpamCop my $habeas_report = 'reports@habeas.com'; # Who should we report HABEAS_SWE forges to? my $bondedsender_report = 'abuse@bondedsender.org'; # Who should we report RCVD_IN_BSP_* to? # Only need to do this up once... :) my $spamass = Mail::SpamAssassin->new( { 'debug' => $debug, } ); $spamass->init(1); # read the config and setup paths and the like # Prepare Bayes $spamass->init_learner({ force_expire => 0, wait_for_lock => 0, caller_will_untie => 1, learn_to_journal => 1, }); use vars qw/ @fullrelaylist /; # # Handle the spam! # This routine takes a message array and plays with it to check for the # appropriate bits. It will then report the spam to places as necessary. # sub HandleSpam($$@) { my ( $blocks, $relayChecked, @msg ) = @_; # Look for a majordomo bounced message, and "unbounce" as necessary. my $i = 0; until ( $msg[$i] =~ /^\r?$/ ) { if ( $msg[$i] =~ m@^Subject: BOUNCE .* taboo header: /\^X-Spam-Flag: YES/i@ ) { splice @msg, 0, $i; # get rid of everything up to the Subject splice @msg, 0, 1 until ( $msg[0] =~ /^>?From / ); $msg[0] =~ s/^>//; last; } $i++; } # raw_message is used for archiving my $raw_message = join ( "", @msg ); # Make a copy of the incoming message without X-x_header.* headers. # Watch out for "!/foo/ || !/bar/" as it will match everything ... my $flag = 0; my @msgcopy = grep( $flag || ( /^\r?$/ && ++$flag ) || !/^X-(?:${x_header}-[^:]+|Reject):/o, @msg ); my $mail = $spamass->parse(\@msgcopy); # SA Status and X-TVD headers are in the main headers all the time my $SAstatus = $mail->get_header("X-Spam-Status") || "No, tests=\n"; $flag = 1; my @reject = grep( ( $flag && (/^X-${x_header}-Reject:/o || /^X-Reject:/) ) || ( /^\r?$/ && --$flag > 0 ), @msg ); # Remove the markup for processing and reporting... my $cleanmsg = $spamass->remove_spamassassin_markup($mail); my @msg2 = @msg = split( /^/m, $cleanmsg ); my $cleanmail = $spamass->parse(\@msg2); my $msg = new Mail::Internet( \@msg, Modify => 0 ); my $head = $msg->head(); my @received = $head->get("Received"); my $from = $head->get("From") || "Unknown"; my $subject = $head->get("Subject") || "Unknown"; # Figure out if the message is considered spam or not $SAstatus =~ s/\n\t+\s+//g; # unfold large sections $SAstatus =~ s/\n\t+/ /g; # unfold everything else 1 while ( $SAstatus =~ s/\b(tests=\S+,)\s+/$1/ ); my ( $status, $tests ) = ( $SAstatus =~ /^(.+?)\s+tests=(\S+)/ ); foreach ( $tests, $from, $subject ) { tr/\040-\176//cd; # remove non-printables, newline, tab, etc ... s/\s+/ /g; # compress multiple whitespace into a single space } my $spamtrap = 0; # Spamtraps don't get reported to spamcop or anyone else my $autoreport = 0; # autoreporting disables almost everything, all net tests foreach (@reject) { if (/spamtrap/) { $spamtrap = 1; last; } elsif (/autoreport/) { /autoreport\s+(.+?)\s*$/; $autoreport = $1 || "Yes"; last; } } # Spit out a message identifier print "Mail From: $from\n"; print "Subject : $subject\n"; print "SA Status: $status\n"; print "Spamtrap?: ", ( $spamtrap ? "Yes" : "No" ), "\n"; print "Autorept?: ", ( $autoreport ? $autoreport : "No" ), "\n"; if ($debug) { print "(debug) SA Tests: $tests\n"; } # If it was sent to us from a non-reversable host, block the class C. $received[0] =~ /\(([^)]+)\)/; my $relay = $1; $relay =~ /\[([0-9.]+)\]/; my $relayIP = $1; if ( $relay =~ /^(?:unknown\s*)?\[[0-9.]+\]$/ || $relay =~ /may be forged/ ) { $relayIP =~ /^([0-9.]+)\.\d+/; my $net = $1; if ( $net eq "127.0.0" ) { print "(block) Class C is $net?!? Ignoring.\n"; } elsif ( $net =~ /^(10|192\.168|172\.(1[6-9]|2[0-9]|3[01]))\./ ) { print "(block) Origin in RFC1918 network ($net). Ignoring.\n"; } elsif ( exists $skiprelay{$net} ) { print "(block) Relay is $skiprelay{$net}, skipping.\n"; } elsif ( exists $skiprelay{$relay} ) { print "(block) Relay is $skiprelay{$relay}, skipping.\n"; } else { ${$blocks}{$net} = "REJECT"; print "(block) Adding $relayIP ($net) to the accessdb block list\n"; } } # As I said, autoreporting disables most things. # Pretty much, just archive it. unless ($autoreport) { # Should we report this spam to Razor? if ($report) { if ($debug) { print "(razor) Reporting message to Razor et al (debug mode disables)\n"; } else { # SA 3.0 had 0 mean ok, 3.1+ has 1 mean ok (more correct) my $ok_return = $Mail::SpamAssassin::VERSION >= 3.001000; if ( $spamass->report_as_spam($mail, { dont_report_to_spamcop => 1 }) != $ok_return ) { print "(razor) Could not report spam to Razor et al!\n"; } else { print "(razor) Submitted message to Razor et al\n"; } } } # If the message was relayed, check the sending host for open-relayness. if ( %relays && keys %relays && !$debug && @received > 1 && -x $testrelay && $relayIP !~ /^127\.0\.0\./ ) { if ( ${$relayChecked}{$relayIP}++ ) { print "(relay) Already did a relay check for $relayIP, skipping\n"; } else { $received[1] =~ /\(([^)]+)\)/; my $origin = $1; $origin =~ /\[([0-9.]+)\]/; my $originIP = $1; $originIP =~ /^([0-9.]+)\.\d+$/; my $oC = $1; $relayIP =~ /^([0-9.]+)\.\d+$/; my $rC = $1; if ( $oC eq $rC ) { print "(relay) Relay and Origin in same class C network ($oC), skipping\n"; } elsif ( $oC =~ /^(10|192\.168|172\.(1[6-9]|2[0-9]|3[01]))\./ ) { print "(relay) Origin in RFC1918 network ($oC), skipping\n"; } elsif ( $oC =~ /^127\./ ) { print "(relay) Origin is Relay ($origin), skipping\n"; } elsif ( exists $skiprelay{$rC} ) { print "(relay) Relay is $skiprelay{$rC}, skipping.\n"; } elsif ( exists $skiprelay{$relayIP} ) { print "(relay) Relay is $skiprelay{$relayIP}, skipping.\n"; } else { open( TR, "$testrelay -d 0 $relayIP|" ) || die "Can't run testrelay!:$!"; my $relayTest =