#!/usr/bin/perl # # handlespam - Automate the processing of spam # # By: Theo Van Dinter (felicity@kluge.net) (c) 1998-2002 # Revision Info: $Id: handlespam 210 2005-09-27 00:29:31Z felicity $ # # I found that I was spending too much time handling spam, so I automated # what I could to help save time. # # This script will do each of the following for each input message # (mbox format): # # ** strip out my X-Reject-style headers for all processing except archiving # ** report the message ala 'spamassassin -r' to Razor, DCC, Pyzor, and # (if available) the Bayes classifier # ** if the message was relayed through a third-party (there are more than 1 # "Received:" headers,) do an open-relay check of that server. if the # server is an open-relay, report them to various open relay databases. # Need my "testrelay" script for this, so off by default. # ** if the sending server doesn't have a proper lookup, block their class C # network. this is a little extreme, I know, but the majority of spam is # either relayed through someone who has no clue, or is directly from # someone without a clue. any decently managed network will have proper # DNS setup for their hosts. # ** report the message to spamcop # ** report the message to the FTC # ** move the message to a spam archive for later referencing # ** if the message wasn't caught by spamassassin (SA), bounce to the # spamassassin-sightings mailing list. (No "X-Spam-Status: Yes" header) # This list is actually defunct now, so the feature is off by default. # ** if the message is a bounce from majordomo (for "X-Spam-Flag: YES"), # then unbounce the message before processing. # # # Incoming mail is scanned via SpamAssassin. Mail that is determined # to be spam is saved into "spam-work". I then periodically go through # and take all actual spam and move it into a folder I call "hs". A cron # job then runs this script over "hs" to handle the reporting process. # Output from handlespam is sent to me via cron, so I can easily see what # was handled, and I can easily cut/paste into my sendmail accessdb. # # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; either version 2 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # $|++; use strict; use Mail::Internet; use Mail::SpamAssassin; use Getopt::Long; # Needs at least perl 5.6.1 and SA 3.0.0 ... require v5.6.1; die "Need SpamAssassin 3.0.0 or higher!\n" unless ($Mail::SpamAssassin::VERSION >= 3.000000); # X-$x_header.* is how we markup things about the message # -Reject is flags for spam my $x_header = 'TVD'; my $fromAddr = $ENV{'LOGNAME'}; # what address for reporting # open-relay address => value. # value: 0 = Send "relay: IP" per message with no body # 1 = Send "relay: IP" per message with body # 2 = Send "relay: IP" as a batch after processing with no bodies # my %relays = ( 'relays@mail-abuse.org' => 1, 'relays@ordb.org' => 2, 'relays@relays.osirusoft.com' => 2, ); # Addresses we should send copies of the spam to. # http://www.ftc.gov/bcp/conline/pubs/online/inbox.htm # What Can I Do With the Spam in my In-Box? # Report it to the Federal Trade Commission. Send a copy of unwanted or # deceptive messages to spam@uce.gov. The FTC uses the unsolicited emails # stored in this database to pursue law enforcement actions against people # who send deceptive spam email. my @sendcopy = ('spam@uce.gov'); # Skip relay/block checks for these nets/relays my %skiprelay = ( '65.214.43' => 'techtarget.com', ); my $sightings = '' ; # 'spamassassin-sightings@lists.sf.net'; # where to send sightings, now defunct my $archive = $ENV{'HOME'} . "/storage/mail/spammers"; # where to save spam my $spamcop = ''; # spamcop addr my $testrelay = ''; # location of testrelay my $report = 1; # Set to 0 if you don't want to report ala 'spamassassin -r' my $debug = 0; # don't actually do anything external ... my $maxSpamcopSize = undef; # Maximum message size to send to SpamCop my $habeas_report = 'reports@habeas.com'; # Who should we report HABEAS_SWE forges to? my $bondedsender_report = 'abuse@bondedsender.org'; # Who should we report RCVD_IN_BSP_* to? # Only need to do this up once... :) my $spamass = Mail::SpamAssassin->new( { 'debug' => $debug, } ); $spamass->init(1); # read the config and setup paths and the like # Prepare Bayes $spamass->init_learner({ force_expire => 0, wait_for_lock => 0, caller_will_untie => 1, learn_to_journal => 1, }); use vars qw/ @fullrelaylist /; # # Handle the spam! # This routine takes a message array and plays with it to check for the # appropriate bits. It will then report the spam to places as necessary. # sub HandleSpam($$@) { my ( $blocks, $relayChecked, @msg ) = @_; # Look for a majordomo bounced message, and "unbounce" as necessary. my $i = 0; until ( $msg[$i] =~ /^\r?$/ ) { if ( $msg[$i] =~ m@^Subject: BOUNCE .* taboo header: /\^X-Spam-Flag: YES/i@ ) { splice @msg, 0, $i; # get rid of everything up to the Subject splice @msg, 0, 1 until ( $msg[0] =~ /^>?From / ); $msg[0] =~ s/^>//; last; } $i++; } # raw_message is used for archiving my $raw_message = join ( "", @msg ); # Make a copy of the incoming message without X-x_header.* headers. # Watch out for "!/foo/ || !/bar/" as it will match everything ... my $flag = 0; my @msgcopy = grep( $flag || ( /^\r?$/ && ++$flag ) || !/^X-(?:${x_header}-[^:]+|Reject):/o, @msg ); my $mail = $spamass->parse(\@msgcopy); # SA Status and X-TVD headers are in the main headers all the time my $SAstatus = $mail->get_header("X-Spam-Status") || "No, tests=\n"; $flag = 1; my @reject = grep( ( $flag && (/^X-${x_header}-Reject:/o || /^X-Reject:/) ) || ( /^\r?$/ && --$flag > 0 ), @msg ); # Remove the markup for processing and reporting... my $cleanmsg = $spamass->remove_spamassassin_markup($mail); my @msg2 = @msg = split( /^/m, $cleanmsg ); my $cleanmail = $spamass->parse(\@msg2); my $msg = new Mail::Internet( \@msg, Modify => 0 ); my $head = $msg->head(); my @received = $head->get("Received"); my $from = $head->get("From") || "Unknown"; my $subject = $head->get("Subject") || "Unknown"; # Figure out if the message is considered spam or not $SAstatus =~ s/\n\t+\s+//g; # unfold large sections $SAstatus =~ s/\n\t+/ /g; # unfold everything else 1 while ( $SAstatus =~ s/\b(tests=\S+,)\s+/$1/ ); my ( $status, $tests ) = ( $SAstatus =~ /^(.+?)\s+tests=(\S+)/ ); foreach ( $tests, $from, $subject ) { tr/\040-\176//cd; # remove non-printables, newline, tab, etc ... s/\s+/ /g; # compress multiple whitespace into a single space } my $spamtrap = 0; # Spamtraps don't get reported to spamcop or anyone else my $autoreport = 0; # autoreporting disables almost everything, all net tests foreach (@reject) { if (/spamtrap/) { $spamtrap = 1; last; } elsif (/autoreport/) { /autoreport\s+(.+?)\s*$/; $autoreport = $1 || "Yes"; last; } } # Spit out a message identifier print "Mail From: $from\n"; print "Subject : $subject\n"; print "SA Status: $status\n"; print "Spamtrap?: ", ( $spamtrap ? "Yes" : "No" ), "\n"; print "Autorept?: ", ( $autoreport ? $autoreport : "No" ), "\n"; if ($debug) { print "(debug) SA Tests: $tests\n"; } # If it was sent to us from a non-reversable host, block the class C. $received[0] =~ /\(([^)]+)\)/; my $relay = $1; $relay =~ /\[([0-9.]+)\]/; my $relayIP = $1; if ( $relay =~ /^(?:unknown\s*)?\[[0-9.]+\]$/ || $relay =~ /may be forged/ ) { $relayIP =~ /^([0-9.]+)\.\d+/; my $net = $1; if ( $net eq "127.0.0" ) { print "(block) Class C is $net?!? Ignoring.\n"; } elsif ( $net =~ /^(10|192\.168|172\.(1[6-9]|2[0-9]|3[01]))\./ ) { print "(block) Origin in RFC1918 network ($net). Ignoring.\n"; } elsif ( exists $skiprelay{$net} ) { print "(block) Relay is $skiprelay{$net}, skipping.\n"; } elsif ( exists $skiprelay{$relay} ) { print "(block) Relay is $skiprelay{$relay}, skipping.\n"; } else { ${$blocks}{$net} = "REJECT"; print "(block) Adding $relayIP ($net) to the accessdb block list\n"; } } # As I said, autoreporting disables most things. # Pretty much, just archive it. unless ($autoreport) { # Should we report this spam to Razor? if ($report) { if ($debug) { print "(razor) Reporting message to Razor et al (debug mode disables)\n"; } else { # SA 3.0 had 0 mean ok, 3.1+ has 1 mean ok (more correct) my $ok_return = $Mail::SpamAssassin::VERSION >= 3.001000; if ( $spamass->report_as_spam($mail, { dont_report_to_spamcop => 1 }) != $ok_return ) { print "(razor) Could not report spam to Razor et al!\n"; } else { print "(razor) Submitted message to Razor et al\n"; } } } # If the message was relayed, check the sending host for open-relayness. if ( %relays && keys %relays && !$debug && @received > 1 && -x $testrelay && $relayIP !~ /^127\.0\.0\./ ) { if ( ${$relayChecked}{$relayIP}++ ) { print "(relay) Already did a relay check for $relayIP, skipping\n"; } else { $received[1] =~ /\(([^)]+)\)/; my $origin = $1; $origin =~ /\[([0-9.]+)\]/; my $originIP = $1; $originIP =~ /^([0-9.]+)\.\d+$/; my $oC = $1; $relayIP =~ /^([0-9.]+)\.\d+$/; my $rC = $1; if ( $oC eq $rC ) { print "(relay) Relay and Origin in same class C network ($oC), skipping\n"; } elsif ( $oC =~ /^(10|192\.168|172\.(1[6-9]|2[0-9]|3[01]))\./ ) { print "(relay) Origin in RFC1918 network ($oC), skipping\n"; } elsif ( $oC =~ /^127\./ ) { print "(relay) Origin is Relay ($origin), skipping\n"; } elsif ( exists $skiprelay{$rC} ) { print "(relay) Relay is $skiprelay{$rC}, skipping.\n"; } elsif ( exists $skiprelay{$relayIP} ) { print "(relay) Relay is $skiprelay{$relayIP}, skipping.\n"; } else { open( TR, "$testrelay -d 0 $relayIP|" ) || die "Can't run testrelay!:$!"; my $relayTest = ; close(TR) or die $! ? "Error closing testrelay pipe: $!" : "Exit status $? from testrelay"; # The server is an open relay, so report to MAPS. if ( $relayTest =~ /appears to be an open relay/ ) { print "(relay) Relay check returned positive for $relayIP\n"; push ( @fullrelaylist, $relayIP ); foreach my $temprelay ( keys %relays ) { next if ( $relays{$temprelay} == 2 ); # at the end ... my @head = ( "From: $fromAddr", "To: $temprelay", "Subject: Re: " . $msg->get('Subject'), "X-Mailer: spamhandler (" . '$Id: handlespam 210 2005-09-27 00:29:31Z felicity $' . ")", ); my @body = ("Relay: $relayIP\n"); push ( @body, "\n", $cleanmsg ) if ( $relays{$temprelay} ); my $nmsg = new Mail::Internet( Header => new Mail::Header( \@head ), Body => \@body ); SendMail( $fromAddr, $temprelay, $nmsg->as_string() ); print "(relay) Open Relay mail sent to $temprelay\n"; } } else { print "(relay) Relay check returned negative for $relayIP\n"; } } } } elsif ($debug) { print "(debug) Skipping open-relay check for $relayIP\n"; } # Was this message considered spam by SA? # If not, bounce it to the spamassassin-sightings list. if ( $status eq "no" && $sightings ) { print "(sight) Sending to SA sightings"; if ($debug) { print " (debug mode disables)"; } else { SendSpam( $fromAddr, $msg, \$cleanmsg, $sightings ); } print "\n"; } # Did this spam forge Habeas, but isn't already in HIL? If so, send it to Habeas! if ( $tests =~ /\bHABEAS_/ && $tests !~ /\bHABEAS_VIOLATOR\b/ && $habeas_report ) { print "(habeas) Spam hit listed by HABEAS, reporting"; if ($debug) { print " (debug mode disables)"; } else { SendSpam( $fromAddr, $msg, \$cleanmsg, $habeas_report ); } print "\n"; } # Did this spam have Bonded Sender approval? If so, send it to Ironport! if ( $tests =~ /\b(RCVD_IN_BSP_[A-Z]+)/ && $bondedsender_report ) { print "(bsp) Spam hit $1, reporting"; if ($debug) { print " (debug mode disables)"; } else { SendSpam( $fromAddr, $msg, \$cleanmsg, $bondedsender_report ); } print "\n"; } # If the message isn't to a spamtrap and we don't learn during reports, # or the message was incorrectly learned as ham, teach Bayes! my $autolearn = $head->get("X-Spam-Autolearn") || "no"; if ( (!$spamtrap && !$spamass->{conf}->{bayes_learn_during_report}) || $autolearn eq "ham" ) { print "(bayes) Learning as spam ...\n"; $spamass->learn ($cleanmail, undef, 1, 0); } # If it's a spamtrap, don't bother sending copies to folks. unless ($spamtrap) { # Report message to spamcop if ($spamcop) { # Try to figure out when the message was received. If we can't, # let SpamCop do it... :) my $time = Mail::SpamAssassin::Util::parse_rfc822_date($received[0]); if ( defined $maxSpamcopSize && length $cleanmsg > $maxSpamcopSize ) { print "(spamc) Skipping SpamCop, message too large.\n"; } elsif ( defined $time && (time - $time >= 2*86400) ) { print "(spamc) Skipping SpamCop, message too old.\n"; } elsif ( $cleanmsg !~ /\r?\n\r?\n.*\w/ ) { print "(spamc) Skipping SpamCop, message is considered to have no body.\n"; } else { print "(spamc) Sending to SpamCop"; if ($debug) { print " (debug mode disables)"; } else { SendSpam( $fromAddr, $msg, \$cleanmsg, $spamcop ); } print "\n"; } } # Send a copy of the spam to anyone who wants it if (@sendcopy) { print "(scopy) Sending spam copies"; if ($debug) { print " (debug mode disables)"; } else { SendSpam( $fromAddr, $msg, \$cleanmsg, @sendcopy ); } print "\n"; } } } # Put the message into our spam archive ArchiveSpam( $archive, $raw_message ); $mail->finish(); $cleanmail->finish(); print "\n"; } # # Send a given message to a given address! # sub SendMail { my ( $from, $to, @msg ) = @_; if ( open( SM, "|/usr/sbin/sendmail -t -oi -oem" ) ) { # interactive, mail errors to sender print SM @msg; close(SM); } else { warn " -- couldn't open sendmail to send the mail!\n"; } } # # Save a given message into a given archive mail spool. # sub ArchiveSpam { my ( $archive, $message ) = @_; return unless ( $archive && defined $message && $message ); if ($debug) { print "(archi) Archive message (debugging disables)\n"; return; } if ( open( AR, ">>$archive" ) ) { print AR $message; close(AR); print "(archi) Message archived to $archive.\n"; } else { print "(archi) Unable to archive spam message in $archive.\n"; } } # # Send a given message (and cleanmsg) to a given list of recipients. # sub SendSpam { my ( $from, $msg, $cleanmsg, @to ) = @_; # Figure out a proper MIME boundary string my $boundary = "DeathToSpam"; $boundary .= "DeathToSpam" while ( ${$cleanmsg} =~ /$boundary/ ); foreach my $to (@to) { my @head = ( "From: $from", "To: $to", "Subject: Spam Report: " . $msg->get('Subject'), "X-Mailer: spamhandler (" . '$Id: handlespam 210 2005-09-27 00:29:31Z felicity $' . ")", "MIME-Version: 1.0", qq@Content-Type: multipart/mixed; boundary="$boundary"@, ); my @body = ( "This is a multi-part message in MIME format.\n", "--$boundary\n", "Content-Type: text/plain; charset=us-ascii\n", "Content-Transfer-Encoding: 7bit\n", "\n", "\n", "--$boundary\n", "Content-Type: message/rfc822\n", "Content-Disposition: attachment\n", "\n", ${$cleanmsg}, "--$boundary--\n" ); my $nmsg = new Mail::Internet( Header => new Mail::Header( \@head ), Body => \@body ); SendMail( $from, $to, $nmsg->as_string() ); } } # # Display the usage statement for the program! # sub Usage { my ($rv) = @_; print " Usage: $0 [ options ] [ ... ] Options: --help This help screen --spamcop Your email address for SpamCop. --maxspamcopsize The maximum sized message to report to SpamCop. If no input files are specified, handlespam will read in from STDIN. "; exit( defined $rv ? $rv : 0 ); } # Convert a string into a decimal number of bytes # accept "###.###[kmg]b?" ... # sub toBytes { my ($free) = @_; my ( $n, $u ); if ( $free =~ /^(\d+(?:\.\d+)?)([kmg])?b?$/i ) { ( $n, $u ) = ( $1, "\L$2" ); } else { return undef; } return ( int( $n * 1024 ) ) if ( $u eq "k" ); return ( int( $n * 1024 * 1024 ) ) if ( $u eq "m" ); return ( int( $n * 1024 * 1024 * 1024 ) ) if ( $u eq "g" ); int($n); } ########### Main section of code ########### my @message = (); my %blocks = (); my %relayChecked = (); my $help = 0; # Get the commandline options GetOptions( "spamcop=s" => \$spamcop, "maxspamcopsize=s" => \$maxSpamcopSize, "help" => \$help, ) || Usage(1); # Print out the usage if neccessary. Usage() if ($help); print "(debug) Debugging mode on, not doing anything externally ...\n" if ($debug); # If it's not just a number, let's convert to bytes... if ( defined $maxSpamcopSize && $maxSpamcopSize =~ /\D/ ) { $maxSpamcopSize = toBytes($maxSpamcopSize); } # # Main loop # It reads from STDIN/file and takes messages in Berkeley format (messages # seperated by "^From ", then processes them through the HandleSpam routine. # my $line = undef; do { $line = <>; # New message! if ( !defined $line || $line =~ /^From / ) { HandleSpam( \%blocks, \%relayChecked, @message ) if (@message); @message = ($line); } else { push ( @message, $line ); } } while ( defined $line ); # close out the bayes stuff $spamass->finish_learner(); if (@fullrelaylist) { foreach my $temprelay ( keys %relays ) { next unless ( $relays{$temprelay} == 2 ); my @head = ( "From: $fromAddr", "To: $temprelay", "Subject: Open Relays", "X-Mailer: spamhandler (" . '$Id: handlespam 210 2005-09-27 00:29:31Z felicity $' . ")", ); my @body = map { "Relay: $_\n" } @fullrelaylist; my $nmsg = new Mail::Internet( Header => new Mail::Header( \@head ), Body => \@body ); SendMail( $fromAddr, $temprelay, $nmsg->as_string() ); print "(relay) Full Listing Open Relay mail sent to $temprelay\n"; } } # Spit out the accessdb-stuff we should be doing print "\n"; # class C blocks print map { "$_ $blocks{$_}\n" } keys %blocks; # open relays print map { qq/$_ ERROR:5.7.1:"550 Rejected: We don't like open relay servers sending us spam!"\n/; } @fullrelaylist; # Everything's good, exit out now. :) exit 0;