#!/bin/sh
#set -x
###############################################################################
#
# File: mail_filter
# RCS: $Header: /home/hugh/sources/misc/filter/mail_filter,v 1.14 2003/04/02 16:46:44 hugh Exp hugh $
# Description: stores e-mail in different mailboxes depending on
# information in the header (From, Received, etc.)
# Author: Hugh Mahon
#
###############################################################################
PATH=/bin:/usr/bin:/usr/local/bin
##
#
# This script takes a single e-mail message from stdin, places it in
# a file, then greps the message header for the 'sender', 'to', and
# 'received' information. The script then greps files in the
# user's home directory to determine what file to place the e-mail in.
# If the grep doesn't find a match in any of the files, it will place
# the message in the default mailbox, which was passed in as a
# command line argument.
#
# The files accessed in the home directory are:
# $HOME/.spammers - known spammers
# $HOME/.safe_senders - suspected 'safe' senders
# $HOME/.spam_words - words in subject used by spammers
# $HOME/.senders - mail-list file - 'From '
# $HOME/.received - mail-list file - 'Received: '
# $HOME/.to - mail-list file - 'To: '
# $HOME/.listpost - mail-list file - 'List-Post: '
# $HOME/.mail_filter.rc - rc file for this script
#
# The format of the .spammers file is a list of complete or partial
# e-mail addresses of known spammers. This mail is simply thrown away.
#
# The format of the .safe_senders file is again a list of complete or
# partial e-mail addresses of addresses that are expected to be free
# of spam.
#
# The format of the .spam_words file is a list of words or phrases
# that are usually only in spam but some of which may be in legitimate
# messages (e.g., 'card' is part of "Credit Card" but may be in
# non-spam e-mail as part of another word or phrase). This mail is
# not discarded, but put in a spam file for possible later perusal.
#
# The format of the mail-list files are:
#
# matching-string path-to-mailbox-file
#
# where a space (' ') separates the string to be matched (either
# sender, to, or received) from the mailbox path. An absolute path
# is best used to achieve desired results.
#
# The file ~/.mail_filter.rc is used to set values for files where
# information is to be sent/filtered. The current values are:
#
# spammers the file where to record that new file from
# the named spammer came in
# spam_file name and location of file to store suspected spam
#
# The contents of the ~/.mail_filter.rc file would look like this:
#
# spammers=/home/user/mail_files/spammers
# spam_file=/home/user/mail_files/spam-file
#
# The path specified should be the full path to the files.
#
# The default paths are $HOME/spammers and $HOME/spam-file
#
##
# Receives mail delivered via SMTP/sendmail; Depending on information
# in the header saves in a designated mailbox or the default mailbox
# which is specified in arg1 ($1). The second argument is a filename
# for a vacation script. If the vacation file exists the e-mail is
# piped into it for processing.
##
# Usage:
# mail_filter default_mailbox vacation_script
# default_mailbox -- default mailbox if no matches are found
# vacation_script -- a script which takes e-mail as stdin
# and sends a reply to the sender with
# a vacation message
##
# Invocation:
# mail_filter is invoked by sendmail when a .forward file exists
# in your $HOME directory containing the following:
#"| /full/path/to/mail_filter_script arguments ..."
##
# Details:
#
# o mail_filter reads stdin, which should be a single mail message;
# this is set up by sendmail if $HOME/.forward contains the line
# above; remember to use double quotes around the whole line and
# full path names for everything, or $HOME to reference your home
# directory ("~" won't work!)
#
# o either appends the message onto the MAILFILE (first argument),
# or places e-mail in file assigned to MAILFILE according to
# match in 'if' statements. Other actions could also be configured.
#
##
# BUGS:
# There is a sendmail defect which prevents vacation from being
# executed if .forward is greater in length than 140 bytes (or so).
# The defect manifests itself by a 'User unknown' message.
# Make sure your .forward file is no longer than necessary.
#
##
# DEBUGGING:
# make sure .forward exists and is readable;
# make sure mail_filter exists and is executable;
# verify that your system does use sendmail;
# try calling the script from your shell using "sh -x" and
# typing in a mail message;
# Reading sendmail's delivery log (/usr/spool/mqueue/syslog) can
# also shed light on problems if they still occur.
##
exec >/dev/null 2>&1 # redirect output and errors to null
#set -x
## BEGIN script
# Set up file arguments -- if command line arguments are not
# passed, then make something up (not good, but it beats losing
# incoming mail ...)
##
#USER=${LOGNAME:-`logname`} # $HOME is not guaranteed to be set!
USER=${LOGNAME:-`whoami`} # $HOME is not guaranteed to be set!
HOME=${HOME:-`egrep "^$USER:" /etc/passwd | cut -d: -f6`} # so set if needed
VAC_FILE=${2} # vacation flag/command file
export HOME
# source the rc file
if [ -f $HOME/.mail_filter.rc ]
then
spammers="`grep spammers $HOME/.mail_filter.rc | sed -e 's/^.*=//'`"
spam_file="`grep spam_file $HOME/.mail_filter.rc | sed -e 's/^.*=//'`"
fi
if [ -z "$spammers" ]
then
spammers=$HOME/spammers
fi
if [ -z "$spam_file" ]
then
spam_file=$HOME/spam_file
fi
PATH=/bin:/usr/bin:/usr/local/bin:$HOME/bin
NAME=`basename $0`
DEFFILE=${1:-$HOME/vac_mbox} # file to save incoming mail
TMPFILE=/tmp/.vac.$$ # temporary working file (incoming)
TMP2=/tmp/.v.$$ # second temporary file
rm -f $TMPFILE # blow away the TMPFILE, just in case
umask 077 # be protective about the TMPFILE
cat > $TMPFILE # save the incoming message and
# determine who the Sender was:
# Create copy of temp file, and strip off message so that there is only one
# 'From ' and one 'Subject: ' line.
cp $TMPFILE $TMP2
ex $TMP2 <&/
w $TMPFILE
EOF
#diffserv="`egrep -i 'diffserv@external.cisco.com' $TMP2`"
From=`egrep '^From: ' $TMP2 | sed -e 's/From: //'`
Subject="`egrep '^Subject: ' $TMP2 | sed -e 's/Subject: //'`"
Sender=`egrep '^From ' $TMP2 | sed -e 's/From \([^ ]*\).*$/\1/'`
To="`egrep -i '^To: ' $TMP2 | sed -e 's/To: //'`"
Received="`egrep '^Received: ' $TMP2 | sed -e 's/Received: from //g'`"
Listpost="`egrep '^List-Post: ' $TMP2 | sed -e 's/List-Post: \//'`"
Special="`egrep '^X-hello: ' $TMP2 | sed -e 's/X-hello: //g'`"
Special2="`egrep '^X-zeta: ' $TMP2 | sed -e 's/X-zeta: //g'`"
MAILFILE=""
# create a lock file to make sure this is not interfering with another
# instance of mail_filter
lock_file=$HOME/.filter_lock
while [ -n "`locker $lock_file`" ]
do
sleep 1
done
# The purpose of this script is to filter e-mail into mailboxes for keeping
# things relatively clean. To do this, the script will check information to
# determine placement of the incoming e-mail.
# is this from a known spammer?
if [ -f "$HOME/.spammers" ]
then
if [ -n "`echo $Sender | grep -i -f $HOME/.spammers`" ]
then
# If this was from a spammer, we don't want this stinking
# mail! Delete it and exit. This of course could be
# modified to send the e-mail to a spam file, but right now
# I don't want to waste the disk space!
echo $Sender `date` >> $spammers
rm -f $TMPFILE $TMP2 $lock_file
exit
fi
# since spammers often put crap in the "From " field, they
# sometimes have repeatable info in the "From: " field, so
# check here
if [ -n "`echo $From | grep -i -f $HOME/.spammers`" ]
then
# If this was from a spammer, we don't want this stinking
# mail! Delete it and exit. This of course could be
# modified to send the e-mail to a spam file, but right now
# I don't want to waste the disk space!
echo $Sender `date` >> $spammers
rm -f $TMPFILE $TMP2 $lock_file
exit
fi
fi
# check to see if this is from a supposedly safe origin, e.g., is the
# address it is coming from a known address, or from a supposedly safe
# domain?
# NOTE: It is a good idea to have this AFTER checking for known
# spammer addresses or domains since they sometimes include the
# recipient's address in the sending address. They're spammers, using
# any other derogatory adjectives would simply be redundant.
supposed_safe=""
if [ -f "$HOME/.safe_senders" ]
then
if [ -n "`echo $Sender | grep -i -f $HOME/.safe_senders`" ]
then
supposed_safe="yes"
fi
fi
# If the address in the e-mail is not considered to be a safe address,
# we check the subject to see if there are known words that are unlikely to
# be in legitimate e-mail. If the mail is suspected of being spam it is
# placed in a spam bucket for possible later examination.
#
if [ -z "$supposed_safe" -a -f "$HOME/.spam_words" ]
then
if [ -n "`echo $Subject | grep -i -f $HOME/.spam_words`" ]
then
# if we suspect this is spam, just bucket it and don't
# waste any more cycles on it
MAILFILE=$spam_file
cat < $TMPFILE >> $MAILFILE ; echo >> $MAILFILE
rm -f $TMPFILE $TMP2 $lock_file
exit
fi
if [ -n "`grep -i -f $HOME/.spam_words $TMPFILE`" ]
then
# if we suspect this is spam, just bucket it and don't
# waste any more cycles on it
MAILFILE=$spam_file
cat < $TMPFILE >> $MAILFILE ; echo >> $MAILFILE
rm -f $TMPFILE $TMP2 $lock_file
exit
fi
fi
if [ -z "$MAILFILE" -a -f $HOME/.senders ]
then
MAILFILE="`grep -i "$Sender " $HOME/.senders | cut -d' ' -f 2`"
fi
if [ -z "$MAILFILE" -a -f $HOME/.received ]
then
MAILFILE="`grep "$Received " $HOME/.received | cut -d' ' -f 2`"
fi
if [ -z "$MAILFILE" -a -f $HOME/.to -a -n "$To" ]
then
MAILFILE="`grep "$To " $HOME/.to | cut -d' ' -f 2`"
fi
if [ -z "$MAILFILE" -a -f $HOME/.listpost -a -n "$Listpost" ]
then
MAILFILE="`grep "$Listpost " $HOME/.listpost | cut -d' ' -f 2`"
fi
if [ -z "$MAILFILE" ]
then
if [ -n "$Special" ]
then
nohup ${HOME}/bin/autocall ${Special} http> /dev/null 2>&1 &
elif [ -n "$Special2" ]
then
nohup ${HOME}/bin/autocall ${Special2} sock > /dev/null 2>&1 &
fi
fi
# wherever this ends up, put it in ical if it is an Outlook scheduling
# message
outlook="`grep '\*~\*~\*~\*~\*~\*~\*~\*~\*~\*' $TMPFILE `"
if [ -n "$outlook" ]
then
${HOME}/bin/ol2ical -f $TMPFILE >> ${HOME}/.calendar
fi
# Other sorting steps if desired, can check to see if sort was made by value
# of $MAILFILE: sort was made if MAILFILE is not blank.
if [ -z "$MAILFILE" ]
then
if [ -x "$VAC_FILE" ]
then
cat < $TMPFILE | $VAC_FILE
rm -f $TMPFILE $TMP2 $lock_file
exit 0
fi
MAILFILE=$DEFFILE
fi
##
# SANITY CHECKS!
##
if [ -n "$MAILFILE" ] && [ ! -f $MAILFILE ] ; then
touch $MAILFILE # ... create the mailfile
chmod 660 $MAILFILE # and lock it up.
chgrp mail $MAILFILE
#just in case...
cat < $TMPFILE >> $HOME/mail_filter.save ; echo >> $HOME/mail_filter.save
fi
if [ "$MAILFILE" ] ; then
# a lock mechanism for use with 'elm'
string="`ls -l /tmp/snd.* | grep $USER`"
while [ -n "$string" ]
do
sleep 1
string="`ls -l /tmp/snd.* | grep $USER`"
done
# save the message in the MAILFILE for later reading;
# append a newline to make sure all mailers are happy
cat < $TMPFILE >> $MAILFILE ; echo >> $MAILFILE
fi
rm -f $TMPFILE $TMP2 $lock_file
exit 0