# -*- Perl -*-
#***********************************************************************
#
# mimedefang-filter
#
# Suggested minimum-protection filter for Microsoft Windows clients, plus
# SpamAssassin checks if SpamAssassin is installed.
#
# Copyright (C) 2002 Roaring Penguin Software Inc.
#
# This program may be distributed under the terms of the GNU General
# Public License, Version 2, or (at your option) any later version.
#
# $Id: suggested-minimum-filter-for-windows-clients,v 1.65 2003/07/03 23:56:05 dfs Exp $
#***********************************************************************

#Last Modified by KAM: 01-16-04
# Network tests are on by default
# AWL Use is on by default
# Bad Exts bugfix
# Vexira Support Added (though not recommended to use ;-) )
# SpamAssassin implementation differences to mimic more of SpamAssassin's features
# Removing HTML emails is not the default

#***********************************************************************
# Set administrator's e-mail address here.  The administrator receives
# quarantine messages and is listed as the contact for site-wide
# MIMEDefang policy.  A good example would be 'defang-admin@mydomain.com'
#***********************************************************************
$AdminAddress = 'postmaster@localhost';
$AdminName = "MIMEDefang Administrator's Full Name";

#***********************************************************************
# Set the e-mail address from which MIMEDefang quarantine warnings and
# user notifications appear to come.  A good example would be
# 'mimedefang@mydomain.com'.  Make sure to have an alias for this
# address if you want replies to it to work.
#***********************************************************************
$DaemonAddress = 'mimedefang@localhost';

#***********************************************************************
# If you set $AddWarningsInline to 1, then MIMEDefang tries *very* hard
# to add warnings directly in the message body (text or html) rather
# than adding a separate "WARNING.TXT" MIME part.  If the message
# has no text or html part, then a separate MIME part is still used.
#***********************************************************************
$AddWarningsInline = 0;

#***********************************************************************
# To enable syslogging of virus and spam activity, add the following
# to the filter:
# md_graphdefang_log_enable();
# You may optionally provide a syslogging facility by passing an
# argument such as:  md_graphdefang_log_enable('local4');  If you do this, be
# sure to setup the new syslog facility (probably in /etc/syslog.conf).
# An optional second argument causes a line of output to be produced
# for each recipient (if it is 1), or only a single summary line
# for all recipients (if it is 0.)  The default is 1.
# Comment this line out to disable logging.
#***********************************************************************
md_graphdefang_log_enable('mail', 1);

#***********************************************************************
# Uncomment this to block messages with more than 50 parts.  This will
# *NOT* work unless you're using Roaring Penguin's patched version
# of MIME tools, version MIME-tools-5.411a-RP-Patched-02 or later.
#
# WARNING: DO NOT SET THIS VARIABLE unless you're using at least
# MIME-tools-5.411a-RP-Patched-02; otherwise, your filter will fail.
#***********************************************************************
# $MaxMIMEParts = 50;

#***********************************************************************
# Set various stupid things your mail client does below.
#***********************************************************************

# Set the next one if your mail client cannot handle nested multipart
# messages.  DO NOT set this lightly; it will cause action_add_part to
# work rather strangely.  Leave it at zero, even for MS Outlook, unless
# you have serious problems.
$Stupidity{"flatten"} = 0;

# Set the next one if your mail client cannot handle multiple "inline"
# parts.
$Stupidity{"NoMultipleInlines"} = 0;

# The next lines force SpamAssassin modules to be loaded and rules
# to be compiled immediately.  This may improve performance on busy
# mail servers.  Comment the lines out if you don't like them.
if ($Features{"SpamAssassin"}) {
    #$SALocalTestsOnly = 1;  # I do NOT want network tests
    $SALocalTestsOnly = 0;  # I DO want network tests

    spam_assassin_init()->compile_now(1) if defined(spam_assassin_init());

    # If you want to use auto-whitelisting:
    if (defined($SASpamTester)) {
       use Mail::SpamAssassin::DBBasedAddrList;
       my $awl = Mail::SpamAssassin::DBBasedAddrList->new();
       $SASpamTester->set_persistent_address_list_factory($awl) if defined($awl);
    }
}

# This procedure returns true for entities with bad filenames.
sub filter_bad_filename ($) {
    my($entity) = @_;
    my($bad_exts, $re);

    # Bad extensions
    $bad_exts = '(ade|adp|app|asd|asf|asx|bas|bat|chm|cmd|com|cpl|crt|dll|exe|fxp|hlp|hta|hto|inf|ini|ins|isp|jse?|lib|lnk|mdb|mde|msc|msi|msp|mst|ocx|pcd|pif|prg|reg|scr|sct|sh|shb|shs|sys|url|vb|vbe|vbs|vcs|vxd|wmd|wms|wmz|wsc|wsf|wsh|\{[^\}]+\})';

    # Do not allow:
    # - CLSIDs  {foobarbaz}
    # - bad extensions (possibly with trailing dots) at end
    $re = '\.' . $bad_exts . '\.*$';
    return re_match($entity, $re);
}

# Scan for a virus using the first supported virus scanner we find.
sub message_contains_virus () {
    return message_contains_virus_avp()      if ($Features{'Virus:AVP'});
    return message_contains_virus_fprot()    if ($Features{'Virus:FPROT'});
    return message_contains_virus_fsav()     if ($Features{'Virus:FSAV'});
    return message_contains_virus_hbedv()    if ($Features{'Virus:HBEDV'});
    return message_contains_virus_nai()      if ($Features{'Virus:NAI'});
    return message_contains_virus_nvcc()     if ($Features{'Virus:NVCC'});
    return message_contains_virus_rav()      if ($Features{'Virus:RAV'});
    return message_contains_virus_sophie()   if ($Features{'Virus:SOPHIE'});
    return message_contains_virus_trophie()  if ($Features{'Virus:TROPHIE'});
    return message_contains_virus_sophos()   if ($Features{'Virus:SOPHOS'});
    return message_contains_virus_trend()    if ($Features{'Virus:TREND'});
    return message_contains_virus_filescan() if ($Features{'Virus:FileScan'});
    return message_contains_virus_clamd()    if ($Features{'Virus:CLAMD'});
    return message_contains_virus_clamav()   if ($Features{'Virus:CLAMAV'});
    return message_contains_virus_vexira()   if ($Features{'Virus:VEXIRA'});
    return message_contains_virus_carrier_scan() if ($Features{'Virus:SymantecCSS'});
    return (wantarray ? (0, 'ok', 'ok') : 0);
}

# Scan for a virus using the first supported virus scanner we find.
sub entity_contains_virus ($) {
    my($e) = @_;
    return entity_contains_virus_avp($e)      if ($Features{'Virus:AVP'});
    return entity_contains_virus_fprot($e)    if ($Features{'Virus:FPROT'});
    return entity_contains_virus_fsav($e)     if ($Features{'Virus:FSAV'});
    return entity_contains_virus_hbedv($e)    if ($Features{'Virus:HBEDV'});
    return entity_contains_virus_nai($e)      if ($Features{'Virus:NAI'});
    return entity_contains_virus_nvcc($e)     if ($Features{'Virus:NVCC'});
    return entity_contains_virus_rav($e)      if ($Features{'Virus:RAV'});
    return entity_contains_virus_sophie($e)   if ($Features{'Virus:SOPHIE'});
    return entity_contains_virus_trophie($e)  if ($Features{'Virus:TROPHIE'});
    return entity_contains_virus_sophos($e)   if ($Features{'Virus:SOPHOS'});
    return entity_contains_virus_trend($e)    if ($Features{'Virus:TREND'});
    return entity_contains_virus_filescan($e) if ($Features{'Virus:FileScan'});
    return entity_contains_virus_clamd($e)    if ($Features{'Virus:CLAMD'});
    return entity_contains_virus_clamav($e)   if ($Features{'Virus:CLAMAV'});
    return entity_contains_virus_vexira($e)   if ($Features{'Virus:VEXIRA'});
    return entity_contains_virus_carrier_scan($e) if ($Features{'Virus:SymantecCSS'});
    return (wantarray ? (0, 'ok', 'ok') : 0);
}

#***********************************************************************
# %PROCEDURE: filter_begin
# %ARGUMENTS:
#  None
# %RETURNS:
#  Nothing
# %DESCRIPTION:
#  Called just before e-mail parts are processed
#***********************************************************************
sub filter_begin () {
    # ALWAYS drop messages with suspicious chars in headers
    if ($SuspiciousCharsInHeaders) {
        md_graphdefang_log('suspicious_chars');
	action_quarantine_entire_message("Message quarantined because of suspicious characters in headers");
	# Do NOT allow message to reach recipient(s)
	return action_discard();
    }

    # Scan for viruses if any virus-scanners are installed
    my($code, $category, $action) = message_contains_virus();

    # Lower level of paranoia - only looks for actual viruses
    $FoundVirus = ($category eq "virus");

    # Higher level of paranoia - takes care of "suspicious" objects
    # $FoundVirus = ($action eq "quarantine");

    if ($action eq "tempfail") {
	action_tempfail("Problem running virus-scanner");
	md_syslog('warning', "Problem running virus scanner: code=$code, category=$category, action=$action");
    }
}

#***********************************************************************
# %PROCEDURE: filter
# %ARGUMENTS:
#  entity -- a Mime::Entity object (see MIME-tools documentation for details)
#  fname -- the suggested filename, taken from the MIME Content-Disposition:
#           header.  If no filename was suggested, then fname is ""
#  ext -- the file extension (everything from the last period in the name
#         to the end of the name, including the period.)
#  type -- the MIME type, taken from the Content-Type: header.
#
#  NOTE: There are two likely and one unlikely place for a filename to
#  appear in a MIME message:  In Content-Disposition: filename, in
#  Content-Type: name, and in Content-Description.  If you are paranoid,
#  you will use the re_match and re_match_ext functions, which return true
#  if ANY of these possibilities match.  re_match checks the whole name;
#  re_match_ext checks the extension.  See the sample filter below for usage.
# %RETURNS:
#  Nothing
# %DESCRIPTION:
#  This function is called once for each part of a MIME message.
#  There are many action_*() routines which can decide the fate
#  of each part; see the mimedefang-filter man page.
#***********************************************************************
sub filter ($$$$) {
    my($entity, $fname, $ext, $type) = @_;

    return if message_rejected(); # Avoid unnecessary work

    # Block message/partial parts
    if (lc($type) eq "message/partial") {
        md_graphdefang_log('message/partial');
	action_bounce("MIME type message/partial not accepted here");
	return action_discard();
    }

    # Virus scan
    if ($FoundVirus) {
	my($code, $category, $action);
	$VirusScannerMessages = "";
	($code, $category, $action) = entity_contains_virus($entity);
	# If you are more paranoid, change to: if ($action eq "quarantine") {
	if ($category eq "virus") {
            md_graphdefang_log('virus',$VirusName, $RelayAddr);

	    # Bounce the mail!
	    action_bounce("Virus $VirusName found in mail - rejected");

	    # But quarantine the part for examination later.  Comment
	    # the next line out if you don't want to bother.
	    action_quarantine($entity, "A known virus was discovered and deleted.  Virus-scanner messages follow:\n$VirusScannerMessages\n\n");

	    return;
	}
	if ($action eq "tempfail") {
	    action_tempfail("Problem running virus-scanner");
	    md_syslog('warning', "Problem running virus scanner: code=$code, category=$category, action=$action");
	}
    }

    if (filter_bad_filename($entity)) {
        md_graphdefang_log('bad_filename', $fname, $type);
	return action_quarantine($entity, "An attachment named $fname was removed from this document as it\nconstituted a security hazard.  If you require this document, please contact\nthe sender and arrange an alternate means of receiving it.\n");
    }

    # eml is bad if it's not multipart
    if (re_match($entity, '\.eml')) {
        md_graphdefang_log('non_multipart');
	return action_quarantine($entity, "A non-multipart attachment named $fname was removed from this document as it\nconstituted a security hazard.  If you require this document, please contact\nthe sender and arrange an alternate means of receiving it.\n");
    }
    # Clean up HTML if Anomy::HTMLCleaner is installed.
    if ($Features{"HTMLCleaner"}) {
	if ($type eq "text/html") {
	    return anomy_clean_html($entity);
	}
    }

    return action_accept();
}

#***********************************************************************
# %PROCEDURE: filter_multipart
# %ARGUMENTS:
#  entity -- a Mime::Entity object (see MIME-tools documentation for details)
#  fname -- the suggested filename, taken from the MIME Content-Disposition:
#           header.  If no filename was suggested, then fname is ""
#  ext -- the file extension (everything from the last period in the name
#         to the end of the name, including the period.)
#  type -- the MIME type, taken from the Content-Type: header.
# %RETURNS:
#  Nothing
# %DESCRIPTION:
#  This is called for multipart "container" parts such as message/rfc822.
#  You cannot replace the body (because multipart parts have no body),
#  but you should check for bad filenames.
#***********************************************************************
sub filter_multipart ($$$$) {
    my($entity, $fname, $ext, $type) = @_;

    if (filter_bad_filename($entity)) {
        md_graphdefang_log('bad_filename', $fname, $type);
	action_notify_administrator("A MULTIPART attachment of type $type, named $fname was dropped.\n");
	return action_drop_with_warning("An attachment of type $type, named $fname was removed from this document as it\nconstituted a security hazard.  If you require this document, please contact\nthe sender and arrange an alternate means of receiving it.\n");
    }

    # eml is bad if it's not message/rfc822
    if (re_match($entity, '\.eml') and ($type ne "message/rfc822")) {
        md_graphdefang_log('non_rfc822',$fname);
	return action_drop_with_warning("A non-message/rfc822 attachment named $fname was removed from this document as it\nconstituted a security hazard.  If you require this document, please contact\nthe sender and arrange an alternate means of receiving it.\n");
    }

    # Block message/partial parts
    if (lc($type) eq "message/partial") {
        md_graphdefang_log('message/partial');
	action_bounce("MIME type message/partial not accepted here");
	return;
    }

    return action_accept();
}


#***********************************************************************
# %PROCEDURE: defang_warning
# %ARGUMENTS:
#  oldfname -- the old file name of an attachment
#  fname -- the new "defanged" name
# %RETURNS:
#  A warning message
# %DESCRIPTION:
#  This function customizes the warning message when an attachment
#  is defanged.
#***********************************************************************
sub defang_warning ($$) {
    my($oldfname, $fname) = @_;
    return
	"An attachment named '$oldfname' was converted to '$fname'.\n" .
	"To recover the file, right-click on the attachment and Save As\n" .
	"'$oldfname'\n";
}

# If SpamAssassin found SPAM, append report.  We do it as a separate
# attachment of type text/plain
sub filter_end ($) {
    my($entity) = @_;

    # If you want quarantine reports, uncomment next line
    # send_quarantine_notifications();

    # IMPORTANT NOTE:  YOU MUST CALL send_quarantine_notifications() AFTER
    # ANY PARTS HAVE BEEN QUARANTINED.  SO IF YOU MODIFY THIS FILTER TO
    # QUARANTINE SPAM, REWORK THE LOGIC TO CALL send_quarantine_notifications()
    # AT THE END!!!

    # No sense doing any extra work
    return if message_rejected();

    # Spam checks if SpamAssassin is installed
    if ($Features{"SpamAssassin"}) {
	if (-s "./INPUTMSG" < 100*1024) {
	    # Only scan messages smaller than 100kB.  Larger messages
	    # are extremely unlikely to be spam, and SpamAssassin is
	    # dreadfully slow on very large messages.
	    my($hits, $req, $names, $report) = spam_assassin_check();

            # Modified to add headers hopefully identical (as close as possible) to spamassassin default -- KAM 07-09-03
            #
            # SpamAssassin modifies three headers X-Spam-Status, X-Spam-Level & X-Spam-Checker-Version.  These are edited 
            # regardless of positive or negative SPAM status

            # X-Spam-Status: No, hits=1.4 required=7.0
            #	tests=AWL,FROM_HAS_MIXED_NUMS,HTML_40_50,MAILTO_TO_SPAM_ADDR,MISSING_OUTLOOK_NAME
	    #   version=2.55
            action_change_header("X-Spam-Status", &build_status_line($hits, $req, $names, $report));

	    # X-Spam-Level: *
            # SpamAssassin can show up to 100 *'s
            my ($score);
            if ($hits < 100) {
              $score = "*" x int($hits);
            } else {
              $score = "*" x 100;
            }
            my ($extra);
            unless ($score) {
              $score = " ";
            }
            action_change_header("X-Spam-Level", "$score");

            # X-Spam-Checker-Version: SpamAssassin 2.55 (1.174.2.19-2003-05-19-exp)
            # No warnings to STDERR using $^W.  Gets rid of logged notes caused by blank variable names below
            $^W = 0;
            action_change_header("X-Spam-Checker-Version", "SpamAssassin $Mail::SpamAssassin::VERSION$Mail::SpamAssassin::EXTRA_VERSION ($Mail::SpamAssassin::SUB_VERSION)");
            $^W = 1;

	    if ($hits >= $req) {
                md_graphdefang_log('spam', $hits, $RelayAddr);

                #MIMEDEFANG DEFAULT HEADER -- SHOWS UP TO 40 *'s
                $score = substr($score,0,40);
                action_change_header("X-Spam-Score", "$hits ($score) $names");

		# If you find the SA report useful, add it, I guess...
                # No longer needed. KAM 10-29-2003. Using Report_safe-esque routine below
		#action_add_part($entity, "text/plain", "-suggest", "$report\n", "SpamAssassinReport.txt", "inline");

		# Old SpamAssassin Subject Change Rule
                # Change Subject: header
                action_change_header("Subject", "*****SPAM***** $Subject");


		# BEGIN REPORTSAFE - KAM 10-29-2003 
                # Implement Report_Safe -- Thanks to James W. Curtis for the original starting code!
                my ($container, $parser, $original, $report2);

                $container = MIME::Entity->build(Type => 'message/rfc822', Description => 'Original message before MIMEDefang', Data => [ "" ]);
		$parser = new MIME::Parser;
		open(IN, '< INPUTMSG');
	        $original = $parser->parse(\*IN);
	        close(IN);

	        $original->head()->replace('X-Relay-Addr', $RelayAddr);
        	$RelayHostname ||= 'N/A';
	        $original->head()->replace('X-Relay-Host', $RelayHostname);
        	$original->head()->replace('X-Relay-Time', scalar(localtime));

	        # Add the original message to the container
        	$container->add_part($original);

	        $report2 = MIME::Entity->build(Type => 'text/plain', Description => 'SpamAssassin Warning', Data  => ["$report\n"], Disposition => "inline", Encoding => "-suggest", Filename => "SpamAssassinReport.txt");


		$entity->parts([$report2]);
		$entity->head()->mime_attr('content-type' => 'multipart/mixed');
		$entity->head()->mime_attr('content-type.boundary' => '------------=_' . scalar(time) .  "-$$-nikc");
		$entity->add_part($container);
	        action_rebuild();
                # END REPORTSAFE


            } else {
                # Delete any existing X-Spam-Score header?
                action_delete_header("X-Spam-Score");
	    }
	}
    }

    # I HATE HTML MAIL!  If there's a multipart/alternative with both
    # text/plain and text/html parts, nuke the text/html.  Thanks for
    # wasting our disk space and bandwidth...

    # If you don't mind HTML mail, comment out the next line.
    #remove_redundant_html_parts($entity);

    # If your Server is an inbound gateway, this will be correct.  Otherwise, logic
    # to decide mail_in will need to be inserted.
    md_graphdefang_log('mail_in');
}

sub build_status_line {
  # Based on _build_status_line from Mail/SpamAssassin/PerMsgStatus.pm 
  # KAM 07-09-03  
  # Doesn't handle the spam/ham bayes determination as I don't think I can accurately
  # get that information
  my ($hits, $req, $names, $report) = @_;
  my $line;

  $line  = (($hits >= $req) ? "Yes, " : "No, ");
  $line .= sprintf("hits=%2.1f required=%2.1f\n", $hits, $req);

  if($_ = $names) {
    $Text::Wrap::columns   = 74;
    $Text::Wrap::huge      = 'overflow';
    $Text::Wrap::break     = '(?<=,)';
    $line .= Text::Wrap::wrap("\ttests=", "\t      ", $_) . "\n";
  } else {
    $line .= "\ttests=none\n";
  }

  $line .= "\t";

  $line .= "version=" . Mail::SpamAssassin::Version();

  return $line;
} 

# DO NOT delete the next line, or Perl will complain.
1;