#!/usr/bin/perl -w # Search (mail spool) files for things that could contain virus. # We only report things: no mail files are changed. # This script is not suitable for scanning executables; you could try # perl -pe 's/\x00//g' FILE.EXE | strings -a | checkvirus # for those (but even that is not recommended). # BEWARE: we specifically ALLOW .doc and similar files and matching # Content-Types to cut down on the noise... # Use something like # # checkvirus /var/spool/mail/* # # or maybe via the following script (checkmailspool): # ## #!/bin/ksh - ## ## # Search (mail spool) files for things that could contain virus; ## # mail to psz the new things we find. ## # ## # Run from crontab with line: ## # 55 4,13,18 * * * /usr/local/sbin/checkmailspool ## ## PATH=/sbin:/usr/sbin:/usr/bin:/usr/local/bin:/usr/local/sbin:/usr/users/system/root/bin ## export PATH ## ## CMD="${0##*/}" ## ## if [ $# = 0 ]; then ## CMP=true ## TMP=/users/system/root/tmp/$CMD ## # print "Searching files in /usr/spool/mail ..." ## # print ## cd /usr/spool/mail ## set -- * ## if [ -f $TMP.out ]; then ## mv $TMP.out $TMP.out.old ## else ## touch $TMP.out.old ## fi ## else ## CMP=false ## TMP=/users/system/root/tmp/$CMD-single ## fi ## ## ## /usr/local/bin/checkvirus "$@" > $TMP.out 2> $TMP.new ## ## ## if [ "$CMP" = true ]; then ## diff $TMP.out.old $TMP.out > $TMP.diff ## grep '^>' $TMP.diff | cat -v >> $TMP.new ## ## if [ -s $TMP.new ]; then ## { ## print ## print "Above are additions since last check by $CMD, full results in:" ## ls -l $TMP.out.old $TMP.out ## } >> $TMP.new ## mailx -s 'Viruses in mail files' psz < $TMP.new ## fi ## else ## cat $TMP.out ## fi # What about a man page? # Comments on design: # # We expect to "work" on mail files, i.e. human-readable text. # # We look at the file(s) line-by-line, without any "context" kept from line # to line. Thus miss things when spread over several lines. (Should we # attempt to keep $prevline, and re-check "$prevline$thisline"?). We do not # attempt to check that content-type matches file extension; do nothing about # encodings (e.g. compressed files) that would need knowledge of a block; # miss mis-matched filename and name in Bagle variants. We thus also miss # attacks like "missing filename" http://www.securityfocus.com/bid/13837. # # Known bugs/problems: # # A CR at end-of-line (or a FF at the beginning) could interfere with our # detections; might remove them, but have not seen them "in the wild". # # Do not handle continuation of header lines: e.g. Subject lines could be # spread over several lines (and cause buffer overflow) but we do not notice. # # We keep just enough context to recognize mail message headers (structured # as mail files), to aid in reporting. We also make a feeble attempt to # recognize Word document attachments, to cut down on the noise. use bytes; # See man pages perlunicode, utf8, bytes $debug = ($0 =~ m/debug/); print "Debug mode: extremely verbose...\n\n" if $debug; print "BEWARE: we allow DOC RTF XLS files (and matching Content-Types) ...\n"; print "Watch out for\n"; print "* EXE BAT COM and similar files, nasty JavaScript,\n"; print " funny Content-Types, some HTML tags :\n\n"; sub base64 { # Decodes argument my ($b) = @_; my ($u) = ''; my ($i, $l); $b =~ tr|A-Za-z0-9+/||cd; # remove non-base64 chars (including padding) $b =~ tr|A-Za-z0-9+/| -_|; # convert to uuencoded format $l = length($b); $b =~ s/.$//, $l-- if ($l % 4) == 1; # Break into lines of 60 encoded chars, prepending "M" or whatever. # For speed, keep all uuencoded strings and unpack in one go. $l -= 60; for ($i = 0; $i <= $l; $i += 60) { $u .= "M" . substr($b, $i, 60); } $b = substr($b, $i) if $i; # and any leftover chars if ($b ne "") { $u .= chr(32 + length($b)*3/4) . $b; } return unpack ("u", $u); } $| = 1; $prevARGV = ""; $prevtitl = ""; $wordfile = 0; $from = ""; $from2 = ""; $to = ""; $date = ""; $subj = ""; $head = 0; $spam = 0; $usydmail = 0; while (<>) { if ($debug) { print "Read $_"; print "\n" unless m/\n$/ } $prevARGV = "", $head = 0, next if $_ eq "\n"; if ($prevARGV ne $ARGV and m/^From (.*) \w{3} (\w{3} [ \d]?\d \d\d:\d\d):\d\d \d\d\d\d\n$/) { $from = $1; $from2 = ""; $to = ""; $date = $2; $subj = ""; $head = 1; $usydmail = 1; } if ($head) { if (m/^From: (.*)\n/i) { $x = $1; $from2 .= " ($x)" unless $from and ( $x eq $from or $x =~ m/\<\Q$from\E\>/ or $x =~ m/^\Q$from\E \(/ ); } if (m/^(?:To|X-smtpdoor-to|Orig-To): (.*)\n/i) { $x = $1; $to .= " (to $x)" unless ( $x eq $ARGV or $x =~ m/^\Q$ARGV\E\@/ or $x =~ m/\<\Q$ARGV\E\@/ or $to =~ m/ \(to \Q$x\E\)/ ); } if ((! $subj) and m/^Subject: (.*)\n/i) { $subj = $1; } if (m/^Received: from /) { # Want to flag internal (USyd-originated) mail $x = $_; $x =~ s/ by .*//; if ($x =~ m/[^\w\.](\d+\.\d+\.\d+\.\d+)([^\w\.]|$)/) { $x = $1; $usydmail = 0 unless $x =~ /^(129\.78|10\.|172\.|127\.0\.)/ and $x !~/^172.(1[3-9][0-9])/; } } # Skip some spam filter lines if (m/^(X-spam-sign|X-(x-)*smtpdoor-spamsign|X-(x-)*PerlMx-Spam): /) { $spam = 1; } elsif ($spam and m/^[\t ]/) { next; } else { $spam = 0; } } $prevARGV = $ARGV; # Careful: do not be recursive. # I wonder how to check sender/recipient when Jim will use this # within smtpdoor... next if m/^> [ \*-] \w+(-\w+)?:/ and $ARGV eq 'psz' and $usydmail and ( $from eq 'root@maths.usyd.edu.au' or ( $from eq 'daemon' and $from2 eq ' (root@maths.usyd.edu.au (system PRIVILEGED account))' ) ) and $subj =~ m/^Viruses in \w+ mail files$/; next if m/\*\*NASTY\*\*/ and ( $ARGV eq 'psz' ) and $usydmail and $from eq 'smap@maths.usyd.edu.au' and $from2 eq ''; next if m/^p\w+:(App|Sys)[lL]og(]+>)?()?()?:((Warning|Error):|:UPHClean - )/ and ( $ARGV eq 'mike' or $ARGV eq 'robertp' ) and $usydmail and $from eq 'mike@maths.usyd.edu.au' and $from2 eq '' and $subj =~ m/^(Lab \d+(\/\d+)?|Admin( PC)?|Staff( PC)?) night jobs notification$/; $p = ""; $c = ""; MATCH: { $p = "*Unprintable char", last MATCH if # But some (most) may be dangerous, see # http://www.securityfocus.com/archive/1/442904 # http://applesoup.googlepages.com/bypass_filter.txt m/[^\t\n\f\r\e -~\x81-\x9a\xa0-\xfe]/ and ! m/^\x04$/; $p = "*Mid-line break", last MATCH if m/[\n\x8a\x8d]./ or (m/\r.*[^\s]/ and !m/^[^\r]*\r=$/) or m/.\f/; $p = "*Escape sequence", last MATCH if m/\e/ and (m/\e([^\$\(]|$)/ or m/\e\$([^B]|$)/ or m/\e\(([^BJ]|$)/); # Allow Asian languages $p = "*Long header", last MATCH if $head and m/.{200}/ and ! m/^((Received|References|DKIM-Signature|DomainKey-Signature|X-\w[\w-]*\w): |[\t ]id \d+ for )/; $p = "*Long line", last MATCH if m/.{999}/; # Exception: nothing wrong with it "as is", # but might complain after uXX and utf7 decoding next if m/^ $/ or m/^ *name=(3D)?\w+-?\w+>\s*$/ or ( $t eq 'href' && $n =~ m/^"http:\/\/([a-z][a-z]\.rd|rd|pa)\.yahoo\.com\/(.*\/)?\*/ ) or ( $t eq 'href' && $n =~ m/^"http:\/\/chkpt\.zdnet\.com\/chkpt\/astr\w+\/$/ ) or ( $t eq 'HREF' && $n =~ m/^"http:\/\/weeklynews\.lastminute\.com\// and $n =~ m/\/Key/ ) or ( $t eq 'href' && $n =~ m/^"http:\/\/www\.sulekha\.com\// and $n =~ m/.asp\?cid/ ) or ( $t eq 'href' && $n =~ m/^"http:\/\/cgi\w*\.ebay\.com(\.au)?\/\w+-?\w+\/eBayISAPI.dll\?[^"]*(">|=$)/ ) or ( $t eq 'href' && $n =~ m/^"mailto:info\@asia\.cnet\.com ">/ ) or ( $t eq 'href' && $n =~ m/^"https?:\/\/www\.vintagecellars\.com\.au\/scripts\/(xworks\.exe|wc\.dll)\?/ ) or ( $t eq 'href' && $n =~ m/^"http:\/\/www\.ht\.com\.au\/scripts\/xworks\.exe\?/ ) or ( ($t eq 'href' or $t eq 'HREF' or $t eq 'img src') && $n =~ m/^"http:\/\/\w+\.(f2|fairfax)\.com\.au\/event\.ng\/Type=/ ) or ( $t eq 'href' && $n =~ m/^"http:\/\/www\.sswug\.org\/sitereg\.reg">/ ); next MATCH if ( $t eq 'href' && $n =~ m/^"mailto:pmx-auto-approve%\w+\@suphys.physics.usyd.edu.au\?subject=Release%20message%20from%20quarantine/ ); $q = ''; $x1 = ''; if ($t !~ m/^begin/i) { if ($n =~ s/^\"//) { $q = '"'; $q = '""', $x1 = $1 if $n =~ s/\"(.*)//; } elsif ($n =~ s/^\'//) { $q = "'"; $q = "''", $x1 = $1 if $n =~ s/\'(.*)//; } else { $n =~ s/([^\s])\s.*/$1/ unless m/^\s*(file)?name\s*=/; $n =~ s/\>.*// if m/;,]|=20|=$)/ and ! ($x1 =~ m/^-->/ and $x =~ m/(^|<)!--/); $p = "*Un-terminated quote", last MATCH if $q =~ m/^.$/ and ! ( $c =~ m/ --b64 / or ( $x =~ m/=$/i && ( $x =~ m/=3d/i or $c =~ m/ --uXX / ) ) or ( m/a href="http:\/\/newsletters.fairfax.com.au\// && m/.{900}/ && m/!$/ ) ); $p = "*IE URL obfuscation", last MATCH if $n =~ m/(%[01][0-9a-f]|\�*[01][0-9a-f][^0-9a-f]|\�*[12]?[0-9][^0-9]).*(\@|=40|%40|\�*40([^0-9a-f]|$)|\�*64([^0-9]|$))/i or $n =~ m/=[01][0-9a-f]([^0-9].*)?(\@|=40|%40|\�*40([^0-9a-f]|$)|\�*64([^0-9]|$))/i or $n =~ m/\?\@/; $p = "*IE download alert bypass", last MATCH if $n =~ m/\.\s*(\w{1,6})\s*\?\./i or $n =~ m/\.\s*(\w{1,6})\s*\?(%[01][0-9a-f]|\�*[01][0-9a-f][^0-9a-f]|\�*[12]?[0-9][^0-9]|=[01][0-9a-f])/i; if ($q and $t =~ m/^(href|img)/i and ( $n =~ m/^https?:/i or $n =~ m/^[^:]*\?/ ) ) { $n =~ s/\?.*//; } # Long names may cause buffer overflow $p = "*Long name", last MATCH if $n =~ m/.{100}/ and ( $t !~ m/^(href|img)/i or $n =~ m/.{200}/ ); next if $t =~ m/^boundary/i; # Only checked Eudora buffer overflow # Ignore common "nasties" $n =~ s/\[\d\]// if $t =~ m/^(name|filename)/i and $q eq '""' and $n =~ m/^\w+\[\d\]\.\w+$/; $n =~ s/ $// if $t =~ m/^(href|img)/i and $q eq '""' and $n =~ m/^[^ ]* $/; # Outlook may simply remove (skip) illegal characters in filenames e.g. virus.%vb*s # Eudora (for uuencoded blocks) skips all control characters (except CR or NL) # and any of "*/:<>?\| . $p = "*Bad name", last MATCH if $t =~ m/^(begin|name|filename|embed)/i and ! ( $t =~ m/^name/i and ( $x =~ m/(^|<)(input|select|textarea) /i or $x =~ m/(^|<)a name=/i ) ) and ( ( $q eq '""' and $n =~ m/[^\w\.\s\&=\(\)\'-]/i and $x !~ m/(^|<)a name=(3d)?\"#\w+\"(>|$)/i ) or ( $q ne '""' and $n =~ m/[^\w\.\s\&=-]/i ) or ( ! $q and $n =~ m/[^\w\.-]/i ) ); $p = "*IE long share name buffer overflow", last MATCH if $t !~ m/^(name|filename)/i and $n =~ m/^(file:)?[\\\/]*[0-9a-fx\.]+[\\\/]*$/i; $p = "*Remote DoS IE Memory Access Violation", last MATCH if $t !~ m/^(name|filename)/i and $n =~ m/^(file:)?[\\\/]*[^a-z][:|]/i; $p = "*Viral web server", last MATCH if $n =~ m/^http:\/\/\d+\.\d+\.\d+\.\d+:(\d+)\// and $1 > 82 and $1 < 8000; # mailto URLs in Outlook in pre-SP3 OfficeXP (==Office2002) $p = "*Embedded quote", last MATCH if $n =~ m/"|=22|%22|\�*22([^0-9a-f]|$)|\�*34([^0-9]|$)|\"/i; # Weird "URL protocol handlers" (about: or javascript:) $p = "*Bad URL protocol", last MATCH if $n =~ m/^(\w+|\w+-\w+):/ and ( $x1 = $1, 1 ) and $x1 !~ m/^(http|https|mailto|Mailto|ftp)$/ and ! ( $t =~ m/^img/i and $x1 =~ m/^cid$/ ) and ! ( $t =~ m/^(href|:src|background)/ and $q eq '""' and $n =~ m/^cid:\w[\w.-]+\w\@\w[\w.]+\w$/i ); # Any extension allowed, e.g. nul.txt is same NUL device. # References: # http://www.securityfocus.com/archive/1/193189 # http://www.securityfocus.com/archive/1/193306 # http://www.securityfocus.com/archive/1/195054 # http://support.microsoft.com/kb/256015 $p = "*DOS device name", last MATCH if $n =~ m/(^|[:\/\\])(\$MMXXXX0|AUX|CAS\d*|CLOCK\$|COM\d|CON|CONFIG\$|DBLBUFF\$|DBLSBIN\$|EMMXXXX0|HLP\$|IFS\$|IFS\$HLP\$|IPC\$|LPT\d|MAILSLOT|MS\$MOUSE|MSCD\d+|NUL|PIPE|PRN|SCSIMGR\$|SETVERXX|UNC|XMSXXXX0)($|[\.\/\\])/i; $p = "*CLSID extension", last MATCH if $n =~ m/\{[0-9a-f]{2}/i; $p = "*Dot name (dot bug)", last MATCH if #$n =~ m/\.\s*$/i and $c !~ m/ --b64 /; $n =~ m/[\.\s]$/i and $c !~ m/ --b64 /; $p = "*Dot-blank name (dot bug)", last MATCH if $n =~ m/\.\s/i; $p = "*Multiple blanks in name", last MATCH if $n =~ m/\s\s/i; $e = ""; $e = $1 if $n =~ m/\.\s*(\w{1,6})\s*$/i or ( ! $q and ( $n =~ m/^[^=]*\.\s*(\w{1,6})($|[^\w\/\.:])/i or $n =~ m/\.\s*(\w{1,6})($|[^\w\/\.:])/i ) ); $e = "" if $e and ($t =~ m/^img/i and $q eq '""' and $n =~ m/cid:\w[\w.-]+\w\@\w[\w.]+\w$/i) or # Careful with href: too many are of type "". ($t =~ m/^href/i and ( $n =~ m/^(http:\/\/|https:\/\/|mailto:|Mailto:|ftp:\/\/)[^\\\/]*$/ or $n =~ m/^(http:\\\\)[^\\\/]*$/ ) ); next unless $e; # Add ZIP and RAR, but not TAR or TGZ, to executables... # See also: # http://support.microsoft.com/kb/883260 $p = "*Executable extension", last MATCH if $e =~ m/^(ade|adp|ani|app|asd|asx|bas|bat|cer|chm|cmd|cnf|com|cpl|crt|csh|dll|do|eml|exe|folder|fxp|grp|hlp|hta|ica|inf|ins|isp|its|job|js|jse|ksh|lnk|ma.|md.|mhtml|mht|ms.|nws|ocx|ops|pcd|pif|prf|prg|pst|rar|rdp|reg|rm|scf|scr|sct|sh.|smi|uls|url|vb|vb.|vs.|vsmacros|vxd|wal|wm.|ws|ws.|xl|xnk|zip)$/i; $p = "*WinZip8 bug extension", last MATCH if $e =~ m/^(b64|bhx|hqx|mim|uu|uue|uu.|xxe)$/i; # LHA references: # http://lists.grok.org.uk/pipermail/full-disclosure/2004-April/020702.html # http://lists.grok.org.uk/pipermail/full-disclosure/2004-May/020776.html # http://lists.grok.org.uk/pipermail/full-disclosure/2004-May/020990.html $p = "*WinZip9 bug extension", last MATCH if $e =~ m/^lha$/i; $p = "*Acroread5.1 bug extension", last MATCH if $e =~ m/^xfdf$/i; $p = " Unrecognized extension" if ! ($e =~ m/^(doc|rtf|xls)$/i) and ### BEWARE ! ($e =~ m/^(asc|avi|bib|bmp|css|dat|data|dvi|eps|f90|gif|gz|htm|html|ics|jpeg|jpg|log|m|mpeg|mpg|p7m|p7s|pcx|pdf|png|pps|ps|shtml|sty|tex|tif|tiff|txt|vcf|xml)$/i) and # ! ($t =~ m/^(href|img|form)/i and $e =~ m/^(\d+|asp|aspx|cfm|cgi|dll|e|gsp|gw|jhtml|jsp|php|php3|pl)$/i) and ! ($t =~ m/^(href|img|form)/i) and ### BEWARE: Allow any as references??? ! ($usydmail and $from =~ m/^(\w\.)?\w+\@maths\.usyd\.edu\.au$/ and ! $from2); # Allow funny named files internally if ($e =~ m/^doc$/i and m/\sfilename=".*doc"$/i) { $wordfile = 2; print "wordfile on: $_\n" if $debug; # Quite often we will miss Word files: because the filename line # ends with semicolon, or there are more MIME headers following, # or because the name was encoded in some way. # We only use this to ignore "\WINDOWS" strings (that often # appear as where the file was saved). We know nothing about # NORMAL.DOT files... } } } last MATCH if $_ eq "ssid=\"clsid:38481807-CA0E-42D2-BF39-B33AF135CC4D\" id=ieooui>\n" and $from eq 'C.Cheen@maths.usyd.edu.au' || $from2 =~ m/C.Cheen\@maths.usyd.edu.au/; last MATCH if $from =~ m/\@newsletters.fairfax.com.au/ and ( m/^\s*var container = document.getElementById\(sContainer\);\s*$/ || m/^\s*parent.document.getElementById\(sFrame\).style.(width|height) = (width|height|iX|iY) \+ "px";\s*$/ || m/\sdocument.domain = domain;\s*$/ ); # Things found in HTML $p = "*HTML construct", last MATCH if m/type=(3d)?\"?password\"?\s*value=(3d)?[^>]{10}/i or # Netscape pre-4.76:
( m/script\s*language[^\w]/i and ! m/