#!/usr/bin/env perl use strict; use warnings; use Data::Dumper; use Getopt::Std; use Cwd; use File::Path qw(make_path); use File::Find; use Term::ANSIColor; # lol use Digest::SHA qw(sha1 sha1_hex); use sigtrap qw/handler signal_cleanup normal-signals/; #use re 'debug'; # constants (let's pretend anyway) my $default_bytes = 500000; my $fc_version = 34; our ($opt_t, $opt_a, $opt_b, $opt_p, $opt_S, $opt_d, $opt_o, $opt_D, $opt_h, $opt_u, $opt_N, $opt_e, $opt_r, $opt_c, $opt_q); sub main::HELP_MESSAGE { print STDERR "Usage:\n"; print STDERR " -t\t" . "ticket number for output dir\n"; # TODO: Use the cPanel API to grab real docroots? print STDERR " -a \t" . "account list, comma-delimited. Will search only public_html\n"; print STDERR " -b \t" . "Number of bytes per file to scan. Default is $default_bytes\n"; print STDERR " -p\t" . "restrict searches to *.php (faster but may miss stuff)\n"; #print STDERR " -s\t" . "check for potentially evil symlinks (target owner different from link owner)\n"; print STDERR " -S\t" . "Skip checking symlinks\n"; print STDERR " -d\t" . "grep for defacements\n"; print STDERR " -o\t" . "other directories to search, independently of -a docroots. May be needed for addon/subdomains\n"; print STDERR " -u\t", "user homedir prefix (default /home)\n"; print STDERR " -D\t", "Debug mode. Output a more detailed log which identifies signature matches.\n"; print STDERR " -N\t", "Show files which do NOT match on stderr (debug feature only)\n"; print STDERR " -e \t", "exclude files wth names ending in . Workaround if scan hangs on js\n"; print STDERR " -r\t", "regex debugging\n"; print STDERR " -c\t", "use cache\n"; print STDERR " -q\t", "quiet\n"; print STDERR " -h\t", "print this help message and quit\n"; exit 1; } # malware signatures. Needs cleanup, maybe an init function. my @sigs; #push @sigs, "foo"; # heuristic matches: # TODO: write a script to generate these and pipe it the PHP core API #push @sigs, '(?!GLOBALS)(G|\\x47|\\?046)(L|\\x4[Cc]|\\107)(O|\\x4[Ff]|\\?041)(B|\\x42|\\117)(A|\\x41|\\?046)(L|\\x4[Cc]|\\101)(S|\\x53|\\?0?05).*'; push @sigs, qr{(?!GLOBALS)(G|\\x47|\\107)(L|\\x4[Cc]|\\114)(O|\\x4[Ff]|\\117)(B|\\x42|\\102)(A|\\x41|\\101)(L|\\x4[Cc]|\\114)(S|\\x53|\\101).*}; push @sigs, qr{(?!eval)(e|\\x65|\\145)(v|\\x76|\\166)(a|\\x61|\\141)(l|\\x6[Cc]|\\154)}; push @sigs, qr{\W(e("\.")?v("\.")?a("\.")?l|a("\.")?s("\.")?s("\.")?e("\.")?r("\.")?t|p("\.")?r("\.")?e("\.")?g("\.")?_("\.")?r("\.")?e("\.")?p("\.")?l("\.")?a("\.")?c("\.")?e.*//e)\W+.*\$\{?"?(_("\.")?G("\.")?E("\.")?T|_("\.")?P("\.")?O("\.")?S("\.")?T|_("\.")?R("\.")?E("\.")?Q("\.")?U("\.")?E("\.")?S("\.")?T|_("\.")?C("\.")?O("\.")?O("\.")?K("\.")?I("\.")?E)}i; push @sigs, qr{\$_(COOKIE|GET|POST|REQUEST)\["\w+"\]\(\$_\g1\["\w+"\]}i; # 569524b977bf8665e1650510eb23fd9e #push @sigs, qr{(?!eval)(e|\\x65|\\145|\s*(''|"")\s*.\s*chr\s*\(101\)\s*.\s*(''|""))(v|\\x76|\\166|s*(''|"")\s*.\s*chr\s*\(118\)\s*.\s*(''|""))(a|\\x61|\\141|s*(''|"")\s*.\s*chr\s*\(97\)\s*.\s*(''|""))(l|\\x6[Cc]|\\154|s*(''|"")\s*.\s*chr\s*\(108\)\s*.\s*(''|""))}; push @sigs, qr{(?!COOKIE)(C|\\x43|\\103)(O|\\x4[Ff]|\\117)(O|\\x4[Ff]|\\117)(K|\\x4[Bb]|\\113)(I|\\x49|\\111)(E|\\x45|\\105)}; push @sigs, qr{(?!base64_decode)(b|\\x62|\\142)(a|\\x61|\\141)(s|\\x73|\\163)(e|\\x65|\\145)(6|\\x36|\\?066)(4|\\x34|\\?064)(_|\\x5[Ff]|\\137)(d|\\x64|\\144)(e|\\x65|\\145)(c|\\x63|\\143)(o|\\x6[Ff]|\\157)(d|\\x64|\\144)(e|\\x65|\\145)}; # one for encode? push @sigs, qr{(?!base64_decode)(b|\\x62|\\142)(a|\\x61|\\141)(s|\\x73|\\163)(e|\\x65|\\145)(6|\\x36|\\?066)(4|\\x34|\\?064)(_|\\x5[Ff]|\\137)}; # TODO str_rot13 push @sigs, qr{(?!)(C|\\x43|\\103)(O|\\x4[Ff]|\\117)(O|\\x4[Ff]|\\117)(K|\\x4[Bb]|\\113)(I|\\x49|\\111)(E|\\x45|\\105)}; # TODO: gzuncompress push @sigs, qr{(?!)(C|\\x43|\\103)(O|\\x4[Ff]|\\117)(O|\\x4[Ff]|\\117)(K|\\x4[Bb]|\\113)(I|\\x49|\\111)(E|\\x45|\\105)}; # TODO: PHP_SELF push @sigs, qr{(?!PHP_SELF)(C|\\x43|\\103)(O|\\x4[Ff]|\\117)(O|\\x4[Ff]|\\117)(K|\\x4[Bb]|\\113)(I|\\x49|\\111)(E|\\x45|\\105)}; # obfuscated inclusions, see 37d336cfacdd043a6e54277a8870833a push @sigs, qr{(include|require).*(?!/home/)(/|\\x2F|\\x2f|\\057)(h|\\x68|\\150)(o|\\x6f|\\x6F|\\157)(m|\\x6d|\\x6D|\\155)(e|\\x65|\\145)(/|\\x2F|\\057)}; # TODO: one for error_ like above #TODO: one for ini_set like above. # TODO: Consider adding chr() to the above # Need one for _COOKIE as well # TODO: found these in the wild obfuscated as above: base64_decode str_rot13 gzuncompress push @sigs, qr{eval\s*\(stripslash}; push @sigs, qr{eval\s*\(base64_decode\s*\(}; push @sigs, qr{eval\(gzuncompress\s*\(base64_decode}; #consider if I really want to get in the business of tracking every false positive in the regex push @sigs, qr{eval\s*\(base64_decode\s*\((!IGlmKCEkY292ZXJ0c3RvcmVidWlsZGVyX29wdGlvbnMtPmlzX2xpY2V)}; # avoid false positives from Smarty function.mailto, SpiffyCal # TODO: We could check for a script tag. But this will make us # miss potentially malicious JS. push @sigs, qr{eval\s*\(unescape(?!.*\$js_encode)(?!\(this.JS)}; # avoid false positives from Smarty function.mailto push @sigs, qr{eval\s*\(gzinflate\s*\(base64_decode}; push @sigs, qr{eval\s*\(gzinflate\s*\(str_rot13\s*\(base64_}; # high potential for false positives on this one push @sigs, qr{eval\s*\(\s*base64_decode\s*\(.*\?\>}; push @sigs, qr{eval\("\?\>"\.base64_decode\(}; # lookup tables? # TODO: not adequate. Need to match each class member at least once in any order. Not sure if possible with a regex, perhaps # with a mess of lookahead/lookbehind. #push @sigs, qr/\s*\$\w+\s*=\s*[\'\.ejupfa6wvgnoqdyhstxbirkzc\%4_lm]{30,}\s*;/; # from 241d8d4ac363de8bea2cbc8cccf2cc3f sample (cutwail?). # next expected iterations of CryptoPHP: expect some false positives maybe: # we can tighten this up with quotes, end paren, semicolon if need be. #push @sigs, qr{include\s*\(.*\.(?i)(png|jpg|jpeg|gif|svg|bmp|pdf)(?-i)}; push @sigs, qr{include\s*\(\s*['"][\w/]*(?i)(png|jpg|jpeg|gif|svg|bmp|pdf)(?-i)['"]\s*\)}; push @sigs, qr{base'\.\(32\*2\)\.'_de'\.'code}; push @sigs, qr{base'\.\(2\*32\)\.'_de'\.'code}; # ugh push @sigs, qr{base'\.\(16\*4\)\.'_de'\.'code}; # LOL push @sigs, qr{base'\.\(4\*16\)\.'_de'\.'code}; # I can do this all day you know. # symlink bombs hopefully push @sigs, qr{symlink.*home.*public_html.*config\.php}; # plaintext or unobfuscated PHP stuff push @sigs, qr{F(['"]\s*\.\s*['"])?i(['"]\s*\.\s*['"])?l(['"]\s*\.\s*['"])?e(['"]\s*\.\s*['"])?s(['"]\s*\.\s*['"])?M(['"]\s*\.\s*['"])?a(['"]\s*\.\s*['"])?n(['"]\s*\.\s*['"])?(?!age)}; #FilesMan with concat obfuscation push @sigs, qr{FilesMan(?!age)}; # TODO: remove when above is tested push @sigs, qr{DirectoryIndex\s*Sux.htm}; # found in .htaccess made by webshells and symlink bombers. push @sigs, qr{DirectoryIndex\s*cp\.html}; # another symlink bomb .htaccess push @sigs, qr{DirectoryIndex\s*z0mbie.htm}; # haven't seen in the wild yet, but associated with symlink bombs. push @sigs, qr{webshell.*[o0]rb}i; # web shell by Orb and a million derivatives push @sigs, qr{(? push @sigs, qr{array = array.*;function x\(\$string\)\{\$b64 = "\\x62\\x61\\x73\\x65\\x36\\x34\\x5f\\x64\\x65\\x63\\x6f\\x64\\x65";\$r13 = "\\x73\\x74\\x72\\x5f\\x72\\x6f\\x74\\x31\\x33";\$gzc = "\\x67\\x7a\\x75\\x6e\\x63\\x6f\\x6d\\x70\\x72\\x65\\x73\\x73";return "" . \$gzc\(\$b64\(\$r13\(\$string\)\)\);\}eval\(x\(\$x\)\);}; push @sigs, qr{if\(\$_GET\['mode'\]=='.*'\)\{echo'\{.*" value="'\.\$_GET\['.*'\]\.'"\}';die\(\);\}}; # russian pharma redirect push @sigs, qr{Auto SQL Injection =D}; push @sigs, qr{\$\w\w\w\w\w\w=gzinflate\(base64_decode\(\$\w\w\w\w\w\w\)\); for\(\$i=0;\$i\ push @sigs, qr{\<\?php\ \$n\ \=\ (!\$m\%3;)(!0; \$postslist)}; # TODO: false positive push @sigs, qr{preg_r5c%x7825hOh}; push @sigs, qr{^\<\?\$\w+.*\=.+}; # *** TODO: still prone to false positives on gzipped data #push @sigs, qr{\Q';}; push @sigs, qr{if\(move_uploaded_file\(\$_FILES\[\$uploaded\]\[\$tmp_name\], \$target_path\)\) \{echo \$uploaded;\}\}\}}; push @sigs, qr{fwrite\(\$II11II11II11II11}; push @sigs, qr{\$\w\w\w\w = \$\w\w\w\w\('', \$\w\w\w\w\(\$\w\w\w\w\("\w\w", "", \$\w\w\w\w.\$\w\w\w\w.\$\w\w\w\w\.\$\w\w\w\w\)\)\); \$\w\w\w\w\(\);}; # found alongside binary malware push @sigs, qr{\$data = json_decode\(base64_decode\(str_replace\(' ', '\+', \$_POST\['data'\]\)\), true\);}; # spammer, may produce false positives push @sigs, qr{LD_PRELOAD=\./libworker\.so}; push @sigs, qr{if\(array_keys\(\$_GET\)\[-1\] == '\w\w\w\w\w'\)\{}; # upload shell push @sigs, qr{if\(\!\$whoami\)\$whoami=exec\("whoami"\)}; # "injector" shell push @sigs, qr{Simple PHP Injection - \*nix & \*BSD OnLy}; # same as above, in case of false positives push @sigs, qr{\$\w+ = mail\(stripslashes\(\$\w+\), stripslashes\(\$\w+\), stripslashes\(\$\w+\)\);}; push @sigs, qr{\@error_reporting\(0\);\@ini_set\('error_log',NULL\);\@ini_set\('log_errors',0\);\@ini_set\('html_errors',0\);\@ini_set\('max_execution_time',0\);\@ini_set\('output_buffering',0\);\@ini_set\('display_errors', 0\);\@ini_set\('file_uploads',1\)}; push @sigs, qr{Simple SOCKS5 Server for Perl}; # perl SOCKS5 impl. not really malicious, but no place on a webserver push @sigs, qr{socks5\.so/snew\.tar}; # dropper for perl SOCKS above push @sigs, qr{http://ssspl\.svn\.sourceforge\.net/viewvc/ssspl/sss\.pl}; # dropper for perl SOCKS above push @sigs, qr{http://socks5\.so/checksocks\.php}; # dropper for perl SOCKSabove push @sigs, qr{_SERVER.*DOCUMENT_ROOT.*index.php.*function.*is_writable.*\@file_get_contents.*if\(\!preg_match.*error_reporting}; push @sigs, qr{if\(isset.*GET.*\)\)\{echo.*\[uname\]"\.php_uname\(\).*\@ini_get.*disable_functions.*DisablePHP.*ini_get.*disable_functions}; push @sigs, qr{Source code obfuscated by Code Eclipse}; # old obfuscator, may produce false positives. Keep an eye on this one. push @sigs, qr{http://javaterm.com/php.txt}; # "rar.php" # TODO TODO TODO TODO BAD SIG push @sigs, qr{if\s+\(\s*isset\s*\(\$_(GET|POST)\[(\"\w+\")\]\)\s*\)\s*\{\s*(\$\w+)\s*\=\s*\$_\g1\[\g2\]\s*;\s*(\$\w+)\s*\=\s*fopen\(\s*\g3,\"r\"\s*\)\s*;\s*(\$\w+)\s*\=\s*\"\"\s*;\s*while\s*\(\!feof\s*\(\s*\g4\s*\)\s*\)\s\{\s*\g5\s*\.\=\s*fread\s*\(\g4}; # file reader, reads, outputs a file named in GET input push @sigs, qr{function smtpmail\(\$host, \$port, \$smtp_login, \$smtp_passw, \$mail_to, \$message, \$SEND\) \{}; # spammer, potential for false positives # TODO: need a more generic form of the following for the heuristic sigs push @sigs, qr{.e.\..v.\..a.\..l\(b.\..a.\..s.\..e.\..6.\..4_d.\..e.\..c.\..o.\..d.\..e}; # if (!isset($indf8e7ff5a)) { $indf8e7ff5a = TRUE;assert("e"."v"."a"."l(b"."a"."s"."e"."6"."4_d"."e"."c"."o"."d"."e('ICRHTE9CQUxTWyd push @sigs, qr{if \(\!isset\(\$\w\w\w\w\w\w\w\w\w\w\w\)\) \{ \$indf8e7ff5a = TRUE;assert\("e"\."v"\."a"\."l\(b"\."a"\."s"\."e"\."6"\."4_d"\."e"\."c"\."o"\."d"\."e\(}; # "rar.php" push @sigs, qr{\@copy\(\$_FILES\[file\]\[tmp_name\], \$_FILES\[file\]\[name\]\); exit;}; push @sigs, qr{SMTP CLOSED AND ATTEMPTS TO RECONNECT NEW CONNECTION SEASON}; push @sigs, qr{PHP Shell by}; push @sigs, qr{Nome do Servidor: <\?php echo \$UNAME = \@php_uname\(\);}; push @sigs, qr{GR5yYXp3YH17ejRne3h9cGdgdWBxPDB5dX9xYWQ9NG8ZHjQ0NDQweHt4NCk0MzMvGR40NDQ0cntmPDB9KSQvMH00KDRnYGZ4cXo8MHl1f3FhZD0vMH0}; push @sigs, qr{\$O00OO0=urldecode\(".*\);\$O00O0O=\$O00OO0\{.*\}\.\$O00OO0\{.*\}\.\$O00OO0\{.*\}.\$O00OO0\{.*\};\$O0OO00=\$O00OO0\{.*\}\.\$O00OO0\{.*\}\.\$O00OO0\{.*\}}; push @sigs, qr{\$\w+\s+\=\s+"";\s+\$\w+\s+\=\s+realpath\(""\)\."/";\s*\$\w+\s+\=\s+"\w+\.php";\s*if\(\!empty\(\$_POST}; # a weird roundabout eval-POST backdoor push @sigs, qr{for\(\$i=0; \$i"\.gzuncompress\(base64_decode\(}; push @sigs, qr{cyber173_decode}; push @sigs, qr{file_put_contents\(.*php.*base64_decode\(.*echo\s+file_get_contents\(.*php.*\)}; push @sigs, qr{php error_reporting\(0\); if \(!defined\('WP_OPTION_KEY'\)\) \{ function \w+\(\) \{ define\('WP_OPTION_KEY','wp_data_newa'\); new}; # CryptoPHP social.png variant push @sigs, qr{if \( 1 == 1\) \{}; # "Aria cPanel cracker" as encoded and bundled with Hitlar's WP plugin push @sigs, qr{ndkzipfiles}; # download shell push @sigs, qr{if \(\(isset\(\$_GET\['step'\]\)\)\&\&\(\!empty\(\$_GET\['step'\]\)\)\) \$step=\$_GET\['step'\]; else \$step=0;}; # download shell push @sigs, qr{\Q..:::aKpuMPiN::::..}; # a pumpkin, apparently. (spam) push @sigs, qr{GetSpamTOol}i; # http://getspamtool.com/ my $tmp = '\w' x 33; push @sigs, qr{<\?php if\(isset\(\$_GET\[\w\w\w\w\w\w\]\)\)\s*\{\$$tmp="}; # yet another encoded FilesMan push @sigs, qr{if\(\!empty\(\$_SERVER\['HTTP_USER_AGENT'\]\)\) \{ \$\w\w\w\w\w\w\w\w\w \= array\("Google", "Slurp", "MSNBot", "ia_archiver", "Yandex", "Rambler", "StackRambler"\); if\(preg_match\('\/' \. implode\('\|', \$\w\w\w\w\w\w\w\w\w\) \. '\/i', \@\$_SERVER\['HTTP_USER_AGENT'\]\)\) \{ header\('HTTP/1\.0 404 Not Found'\); exit; \} \} \@ini_set\('error_log'}; # upload shell hiding from search crawlers push @sigs, qr{function\s+getContent\(\$host,\s*\$path,\s*\$template,\s*\$pathToDor\)}; # some sort of cookie-based user tracker or caching proxy found on compromised site push @sigs, qr{un1xbold\s.*edition}; # "private mailer" spam kiddies push @sigs, qr{Data Cha0s Connect Back Backdoor\\n\\n}; # perl connectback shell push @sigs, qr{socket\(SERVER, PF_INET, SOCK_STREAM, \$proto\) \|\| die \("Socket Error\\n"\);}; # perl connectback shell, watch for false positives # what appears to be a DALnet-based botnet running as "httpd". Incorporates eggdrop and includes TCL scripts push @sigs, qr{yang harus digunakan nama confilenya adalah djcrew}; push @sigs, qr{XHide - Process Faker}; # they cobbled it together from existing tools, Schizoprenic Xnuxer Research (c) 2002 push @sigs, qr{i\./\w\w -s "/usr/local/apache/sbin/httpd -DSSL" \./httpd -m}; # process faker launcher script push @sigs, qr{Enj0y y0uR d00r}; # socket listener binary for control push @sigs, qr{candayotelnet}; # socket listener binary for control push @sigs, qr{You are a master\. Many many more commands are}; # not really malware, just eggrop, but it's a red flag on a web server and against many AUP's push @sigs, qr{This trick is borrowed from Tothwolf's Wolfpack}; # not really malware, just eggrop, but it's a red flag on a web server and against many AUP's push @sigs, qr{Dynamic Channel File for starts}; # not really malware, just eggrop, but it's a red flag on a web server and against many AUP's # end what appears to be an IRC-based botnet running as "httpd" # some sort of spam site redirecting landing page. Thousands of scripts in each installation. # end some sort of spam site redirecting landing page. Thousands of scripts in each installation. push @sigs, qr{\<\?php \$user_agent_to_filter = array\( '\#Ask\\s\*Jeeves\#i', '\#HP\\s\*Web\\s\*PrintSmart\#i', '\#HTTrack\#i', '\#IDBot\#i', '\#Indy\\s\*Library\#',}; # payload / landing page push @sigs, qr{\$redirect = str_replace\("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "\$keyword", \$redirect\);}; # single creator script? push @sigs, qr{"usha sex boobs images free"}; # used in content templates? push @sigs, qr{ass"\."ert"; \$\w\(\$\{"_PO"\."ST}; # POST backdoor hidden in an asser push @sigs, qr{\#p\@\$c\@\#}; # googledork on an upload shell push @sigs, qr{echo "publish success"}; # upload shell push @sigs, qr{\$\w\w\w\="\`xZ\\"6zl>H,\@i0B\\\^\\rgN'RGb\!dFjCyw\}5\~7\\nO8s-JuDhX\(Up43oV\\\\\]\+t\|\%\\t\?_frI\{SM}; # spammer of some sort push @sigs, qr{Data Cha0s Connect Back Backdoor}; # dc.pl push @sigs, qr{HELLION PROUDLY PRESENTS}; # Autokiller GPL-licensed phishing scripts push @sigs, qr{chr\(\( ord\(\$l___l_\[\$l____l_\]\)-ord\(\$l__l_\[\$l____l_\]\)\)\%256}; # 19d64898dbcedd1887677f184511a5b3 stealrat? push @sigs, qr{ashcoike_surl}; # KingDefacer push @sigs, qr{KingDefacer}; # KingDefacer push @sigs, qr{\$shver \= \"}; # KingDefacer push @sigs, qr{\w+\=\(ord\(\$\w+\[\d\]\)\<\<8\)\+ord}; push @sigs, qr{\$s_pass = "\w{32}";}; # a shell I guess push @sigs, qr{\\w+\(\$\w+\[\$this->\w+\(\)\]\)\;}; # a9f36586f270eaceeb8c0653cd46d047 HTTP header backdoor push @sigs, qr{isset\s*\(\$_POST(\['\w+'\])\)\s*\?\s*\$_POST\g1\s+:\s*\(isset\(\$_COOKIE\g1\)\s*\?\s*\$_COOKIE\g1\s*:\s*NULL\)}; # 7f3982e882511e24ec9e9a65a9872e3e push @sigs, qr/(\s*\.\s*\$\w+\[\d+\]){30,}/; # lookup table, 5b5ac71f40c65eb6fcc8493c06a2d852 push @sigs, qr/\.\/\.ips1\.txt/; # 88ec6d213c2b8d371ed3e3d01e495524 push @sigs, qr{\*\/\@include\( dirname\( __FILE__ \) \. \'\/wp-includes\/js\/utilities\.js\' \);}; # include trying to hide as annotation # end malware signatures # globals my $info_bytes = 0; my $info_scarylinks = 0; my $info_regexhits = 0; my $scan_symlink; my $scan_phponly; # TODO: expand this to include *.pl, .htaccess, maybe *.html? my $scan_exclude; my $scan_bytes; my $scan_debug; my $scan_outfile; my $scan_linkfile; my $scan_debugfile; my $scan_nonmatches; my $scan_redebug; my $scan_usecache; my $scan_tick; my $scan_cachefile; my %cache; my $SCANOUT; my $DEBUGOUT; my $LINKOUT; # output to scan and debug output file handles, opening first if needed sub scanout { if (!$SCANOUT) { open $SCANOUT, '>>', $scan_outfile or die $!; } my $oldfh = select $SCANOUT; $| = 1; my $out = shift; print $SCANOUT $out; select $oldfh; } sub debugout { if ($scan_debug) { if (!$DEBUGOUT) { open $DEBUGOUT, '>>', $scan_debugfile or die $!; } my $oldfh = select $DEBUGOUT; $| = 1; my $out = shift; print $DEBUGOUT $out; select $oldfh; } } # want a separate log for symlinks since they're handled differently sub linkout { if ($scan_symlink) { if (!$LINKOUT) { open $LINKOUT, '>>', $scan_linkfile or die $!; } my $oldfh = select $LINKOUT; $| = 1; my $out = shift; print $LINKOUT $out; select $oldfh; } } sub dumpcache { debugout "dumping cache...\n"; open my $CACHEOUT, '>', $scan_cachefile or die $!; while (my ($key, $value ) = each %cache) { #print "dump $key $value\n"; print $CACHEOUT "$key\000$value\n"; } close $CACHEOUT; debugout "done dumping cache.\n"; } sub loadcache { debugout "loading cache...\n"; open my $CACHEOUT, '<', $scan_cachefile or return; while (<$CACHEOUT>) { chomp; my @pair = split('\000', $_); #print "."; progress_tick(); $cache{$pair[0]} = $pair[1]; #debugout "read back $cache{$pair[0]}\n"; } close $CACHEOUT; print "done loading cache.\n"; } sub debugcache { print "DEBUG_CACHE begin\n"; my $count = keys %cache; print "Keys: $count\n"; for my $key (keys %cache) { print "DEBUG_CACHE: $key === $cache{$key}\n"; } print "DEBUG_CACHE end\n"; } # sub signal_cleanup { debugout "Signal caught, cleanup time!\n"; dumpcache(); die "Caught signal, quitting!\n"; } # callback and its children for File::Find API sub wanted { #print "$_\n"; my ($dev,$ino,$mode,$nlink,$uid,$gid); ($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_); #print "$_\n"; (($scan_symlink && -l _ # symlink scan && match_link($_))) || (-f _ && ( /^.*php\z/si || !$scan_phponly ) #&& !( /^.*$scan_exclude\z/si || $scan_exclude ne "" ) #&& ((int(((-s _) + 511) / 512) <= 52428800) || $scan_bytes) && match_file($_)); #, 'foo') return; } sub match_file { my $file = shift; #print "file: $file\n"; # loop over malware signatures. Probably less efficient than a combined # alternation regex with | but easier to maintain signature list. progress_tick(); #open my $fh, "<", $file or die $!; open my $fh, "<", $file or return $!; # TODO handle error gracefully binmode $fh; local $/; # read the max size, then process that line by line. my ($n, $buf); $n = read $fh, $buf, $scan_bytes; #print "n is: $n\n"; # TODO: key on digest, "clean" for clean, or sig if dirty #$cache{$file} = sha1_hex($buf); my $digest = sha1_hex($buf); #print "Digest $digest "; #print "$file >$cache{$file}<\n"; if (!defined $cache{$digest}) { $cache{$digest} = ''; #print "was NOT in cache\n"; } # else { # print "was already in cache\n"; #} if ($cache{$digest} eq '') { #unseen file contents # open string as a file so we can process it by line, without array. open my $bufh, '<:', \$buf or die $!; # TODO: outer loop here for transforms (remove comments, etc). while (<$bufh>) { for my $sig (@sigs) { #if ($scan_redebug) { print "input >$_<\n"; } if ($scan_redebug) { print "for loop iteration beginning. Matching $sig\n"; } if ($_ =~ $sig) { # TODO: check whitelist sigs here. # TODO: a whilelist match should only negate a given sig, since # a common false-positive could still be backdoored with something # else whose signature we haven't matched yet $cache{$digest} = $sig; my $type = ref $sig; $info_regexhits++ ; if ($scan_debug) { print "\b$File::Find::name matched $sig\n"; } scanout "$File::Find::name\n"; debugout "$File::Find::name matched $sig\n"; return 1; } #debugout "done matching line\n"; } $cache{$digest} = "clean"; if ($scan_nonmatches) { # TODO: figure out if this is going in scan log, debug log, or what. print STDERR "\b$File::Find::name DID NOT MATCH\n"; } } } else { # cached file contents #print "cache hit! \n"; my $sig = $cache{$digest}; if ($sig ne 'clean') { $info_regexhits++; if ($scan_debug) { print "\b$File::Find::name matched $sig\n"; } scanout "$File::Find::name\n"; debugout "$File::Find::name matched $sig\n"; return 1; } } #undef @lines; undef $buf; } sub match_link { # check if target owner different. # maybe only flag if owner is another cPanel user # TODO: will need a separate log for links my $link = shift; # TODO: kind of silly to stat again, try to avoid this? But we basically # need to do this to get link and link target owner uid # TODO: if needed with can get clever with UID ranges to weed out false # positives. my ($t_foo, $t_baz, $t_quux, $t_xyzzy, $t_uid) = stat($link); my ($l_foo, $l_baz, $l_quux, $l_xyzzy, $l_uid) = lstat($link); my $target = readlink($link); #print "$t_uid $l_uid\n"; # Is the link target nonexistent? (could be innocent clutter, or a symlink bomb). # we're basically checking for failure of stat() above if (!defined $t_uid) { print "\rSYMLINK: $File::Find::name===>$target (nonexistent target)\n"; linkout "$File::Find::name===>$target\n"; $info_scarylinks++; } # is the link target owned by a different user (TODO: root or different non-system user?) elsif ($t_uid != $l_uid) { print "\rSYMLINK: $File::Find::name===>$target (different uid)\n"; linkout "$File::Find::name===>$target\n"; $info_scarylinks++; } } { my $count = 0; my $throbcount = 0; my @throbsets = ( [ '◴', '◵', '◶', '◷' ], [ '◜', '◝', '◞', '◟' ], [ '|', '/', '-', '\\'], [ '◰', '◱', '◲', '◳' ], [ '☺', '○'], [ '⚡', '◊'], [ '|', '◊', '◇', '◈', '◆', '○', '◉', '○', '◆', '◈', '◇', '◊' ], [ '◙', '◚', '◛', '◖', '◗' ], [ '✴', '✳', '✶', '✸', '✺', '✰' ], [ '▵', '▴', '△', '▲' ] ); my $seed = int(rand(100)); my $offset = $seed % (scalar @throbsets); # TODO: clean up with proper rand range isntead of modulo my @throbber = $throbsets[$offset]; my @colors = qw(green green blue blue cyan cyan yellow yellow red red magenta magenta white white ); my $numcolors = @colors; #my @throbber = ('_', '.', ',', '*', 'o', '0', 'O', '0', 'o', '*', ',', '.', '_' ); my $ticks = @throbber; my $setref = $throbsets[$offset]; $ticks = @$setref; sub progress_tick { if ($scan_tick) { my $tick = $count % $ticks; if ($tick == 0) { $throbcount += 1; } my $color = $throbcount % $numcolors; # needs work print STDERR "\r"; print STDERR colored($throbsets[$offset][$tick], $colors[$color]); $count += 1; } } } # end File::Find callback section getopts("t:a:psdqo:Duhb:Ne:rcq"); if ($opt_h) { main::HELP_MESSAGE(); } # initialization my $invoke_cwd = cwd(); # validate input. Make sure we have enough info to proceed. my $ticket = $opt_t || $ENV{'TICKET'}; $scan_phponly = $opt_p || 1; $scan_bytes = $opt_b || $default_bytes; $scan_debug = $opt_D || 0; $scan_nonmatches = $opt_N || 0; $scan_symlink = !$opt_S || 0; # symlink scan is default now but can be disabled $scan_exclude = $opt_e || ""; $scan_redebug = $opt_r || 0; $scan_usecache = $opt_c || 1; $scan_tick = !$opt_q || 0; my $scan_deface = $opt_d || 0; my $homeprefix = $opt_u || "/home"; #use if $scan_redebug, re 'debug'; if ($scan_redebug) { print "redebug\n"; } # don't continue unless we have directories to scan. my $in_accts = $opt_a || ""; my $in_otherdirs = $opt_o || ""; if ($in_accts eq "" && $in_otherdirs eq "") { print STDERR "Error: can't continue without a scan target in -a or -o!\n"; main::HELP_MESSAGE(); } # create output directory; make sure it's writeable my $outdir = $ENV{"HOME"} . "/support"; if (length $ticket) { $outdir .= "/" . $ticket; } print STDERR "Output directory: $outdir\n"; if (! (-e $outdir)) { make_path $outdir or die $!; print STDERR "created output directory\n"; } my $datestamp = `date +%Y%m%d%H%M%S`; chomp($datestamp); $scan_outfile = $outdir . "/scan-" . $datestamp . ".txt"; $scan_linkfile = $outdir . "/symlinks-" . $datestamp . ".txt"; $scan_debugfile = $outdir . "/debug-" . $datestamp . ".txt"; $scan_cachefile = $ENV{"HOME"} . "/support/" . ".fc.cache.$fc_version-$scan_bytes"; # get list of accounts. Build filesystem paths. # TODO: does the cPanel API let us grab all docroots for an account? # TODO: A better option altogether may be to scan the entire home directory, excluding mail. my @accts = split ',', $in_accts; my @otherdirs = split ",", $in_otherdirs; my @scandirs; for (@accts) { push @scandirs, $homeprefix . "/" . $_ . "/public_html"; } for (@otherdirs) { if (substr($_, 0, 1) ne "/") { push @scandirs, $invoke_cwd . "/" . $_; # absolute path } else { push @scandirs, $_; } } $| = 1; #unbuffered output print "Scanning the following directories:\n" . join("\n", @scandirs) . "\n"; if ($scan_usecache) { print "Using cachefile $scan_cachefile\n"; loadcache(); } # do stuff. find malware. rescue kitties out of trees. do the needful. # regex malware scan debugout "# Starting regex scan...\n"; scanout "#\n"; for (@scandirs) { File::Find::find(\&wanted, $_); } #print Dumper %cache; #print STDERR "\b"; # regex defacement scan # symlink scan print "\n# Found $info_regexhits hits! Scan complete. Log is at $scan_outfile\n"; if ($scan_symlink && $info_scarylinks > 0) { my $object = "symlink"; if ($info_scarylinks > 1) { $object = "symlinks"; } print "# Symlink scan results are at $scan_linkfile. Found $info_scarylinks potentially malicious $object.\n"; } if ($scan_usecache) { # TODO: move this to a sub handler for SIGTERM dumpcache(); } scanout "#\n"; debugout "# Scan complete.\n"; exit 0;