#!/usr/bin/perl # trim_whitelist # Mangled from check_whitelist from spamassassin-tools # Trims AWL files by removing the incredible bloat from one-off email addresses. use strict; use Fcntl; # must match line at top of lib/Mail/SpamAssassin/DBBasedAddrList.pm. # now off until 3.0 # BEGIN { @AnyDBM_File::ISA = qw(DB_File GDBM_File NDBM_File SDBM_File); } use AnyDBM_File ; use DB_File; use vars qw( %oldawl %newawl $totalscore @keys @stat $uid $gid) ; my $db; my $newdb; if ($#ARGV == -1) { $db = $ENV{HOME}."/.spamassassin/auto-whitelist"; $newdb = $ENV{HOME}."/.spamassassin/auto-whitelist-new"; } else { $db = $ARGV[0]; $newdb = $ARGV[0]."-new"; } # copy old db to backup before doing anything # We copy instead of moving to keep the db live while we're processing. qx { cp $db $db-old }; #tie %oldawl, "AnyDBM_File",$db, O_RDONLY, 0600 # or die "Cannot open file $db: $!\n"; tie %oldawl, "DB_File",$db, O_RDONLY,0600 or die "Cannot open file $db: $!\n"; #tie %newawl, "AnyDBM_File",$newdb, O_RDWR|O_CREAT, 0600 # or die "Cannot open file $newdb: $!\n"; tie %newawl, "DB_File",$newdb, O_RDWR|O_CREAT, 0600 or die "Cannot open file $newdb: $!\n"; my @keys = grep(!/totscore$/,keys(%oldawl)); my $totentries = 0; my $keptentries = 0; for my $key (@keys) { $totentries++; my $totalscore = $oldawl{"$key|totscore"}; my $count = $oldawl{$key}; if(defined($totalscore)) { # If the count is 1, discard the entry, otherwise store the entry in the new db. if ($count != 1) { $keptentries++; $newawl{$key} = $count; $newawl{"$key|totscore"} = $totalscore; } # printf "% 8.1f %15s -- %s\n", # $totalscore/$count, (sprintf "(%.1f/%d)",$totalscore/$count,$count), # $key; } } untie %oldawl; untie %newawl; print "$totentries entries in old database.\n". "Kept $keptentries, deleted ".($totentries-$keptentries).".\n"; if ($< == 0) { print "Getting correct UID/GID from original file...\n"; # We need this to make sure the new db is properly RW for the user that owns it. # Do this BEFORE copying the new over the old! # Note this is ONLY required while running as root- other users will get the proper # ownership. @stat = stat "$db"; $uid = $stat[4]; $gid = $stat[5]; } # Copy the new db over the old qx { mv -f $newdb $db }; if ($< == 0) { print "Setting correct ownership on new db...\n"; # And finally make sure the new db is owned by the right user. # DON'T chown the -old.db file- that just makes the problem worse! chown $uid, $gid, "$db"; } else { print "You may want to delete the old AWL database\n". "(~/.spamassassin/auto-whitelist-old) as it's probably\n". " taking up quite a bit of disk space.\n"; }