#!/usr/bin/perl -w use strict; #use lib $ENV{'HOME'}.'/SA/spamassassin-head/lib'; # Requires SpamAssassin 3.x modules use Mail::SpamAssassin; use Mail::SpamAssassin::ArchiveIterator; use Mail::SpamAssassin::Message; use Mail::SpamAssassin::PerMsgStatus; use Mail::SpamAssassin::Util; my $debug = undef; my $showloc = 1; # Usage: $0 [mbox or maildir] ... # no argument or "-" will take input from STDIN # argv in mass-check format, so it'll also accept: # :mbox:/path/to/file # :dir:/path/to/maildir # Everything below here needs ArchiveIterator ... my $iter = new Mail::SpamAssassin::ArchiveIterator( { 'opt_j' => 0, # do it all in memory 'opt_n' => 1, # don't bother sorting 'opt_all' => 1, # grab all mails 'wanted_sub' => \&wanted, 'result_sub' => sub {}, } ); while ($ARGV[0] =~ /^-+(.+)/) { my $opt = $1; $showloc = 0 if (lc $opt eq 'noloc'); shift @ARGV; } push ( @ARGV, '-' ) if ( !@ARGV ); foreach ( @ARGV ) { # try to figure out the format unless (/:[a-z]+:/) { if (-d $_) { $_ = ":dir:$_"; } else { $_ = ":mbox:$_"; } } } my $sa = new Mail::SpamAssassin({ debug => $debug }); $sa->init(0); my %list = (); my %skip = map { $_ => 1 } qw/a_empty img/; # Go run the messages! $iter->run(@ARGV); sub wanted { my $name = $_[1]; my $dataref = $_[3]; my $msg = $sa->parse($dataref); # Remove the SA markup if it exists my $x_spam_status = $msg->get_header("X-Spam-Status"); if ($x_spam_status) { my $new_ma = $sa->parse($sa->remove_spamassassin_markup($msg)); $msg->finish(); $msg = $new_ma; } my $pms = Mail::SpamAssassin::PerMsgStatus->new($sa, $msg); my @list = (); # Generate the full list of html-parsed domains. my $parsed = $pms->get_uri_detail_list(); while (my($uri, $info) = each %{$parsed}) { $uri =~ tr/\r//d; next unless ($uri =~ /^https?:/); next unless ($info->{domains}); my $use = 0; if ($info->{types}->{a}) { # determine a vs a_empty foreach my $at (@{$info->{anchor_text}}) { if (length $at) { $use = 1; last; } } } elsif ($info->{types}->{form}) { $use = 1; } elsif ($info->{types}->{img}) { $use = 1; } next unless $use; push(@list, keys %{$info->{domains}}); } foreach (@list) { next if /^\d+\.\d+\.\d+\.\d+$/; if (!$list{$_}++) { print "$name " if $showloc; print "$_\n"; } } $pms->finish(); $msg->finish(); }