Wednesday, April 18, 2012

Apache Log Regex for simpleminded parsing.

The common log format would be
10.75.80.77 2 - - [17/Apr/2012:08:52:32 -0400] "GET /EASE/admin/login.jsf HTTP/1.1" 302 - "-" "Mozilla/5.0"
So here the regex to parse this...
my $expression =
qr/([0-9\.]+)\s+\d*\s?\-\s\-\s\[(\d+)\/(\w{3})\/(\d{4}):(\d+):.*\s[-+]\d+\]\s\"(POST|GET)\s(.+)\sHTTP\/(\d.\d)\"\s(\d{3})\s(\d+|-)\s\"(\S+)\"\s\"(.+)\"/x;
my (
 $all,       $ip,       $dayofmonth, $month,    $year,
 $hourofday, $reqtype, $req,        $protocol, $response,
 $size,      $ref,     $agent
) = @rec = $_ =~ /($expression)/i;
Here an example of yet another parser implementation of apache logs... that greps all the POST and GET's
use Data::Dumper;
use Text::Table;

sub line {
 my $x = shift;
 $x = $x ? $x : $W;
 for ( 1 .. $x ) { print "-"; }
 print "\n";

}

BEGIN {
 our $W = 70;
 %row = ();
 print "\n\n";
 line($W);
 printf "[%-35s]\n", $ARGV[$cc];
 line($W);
 $cc++;
}
my @rec =();
my $expression =
qr/([0-9\.]+)\s+\d*\s?\-\s\-\s\[(\d+)\/(\w{3})\/(\d{4}):(\d+):.*\s[-+]\d+\]\s\"(POST|GET)\s(.+)\sHTTP\/(\d.\d)\"\s(\d{3})\s(\d+|-)\s\"(\S+)\"\s\"(.+)\"/x;
my (
 $all,       $ip,       $dayofmonth, $month,    $year,
 $hourofday, $reqtype, $req,        $protocol, $response,
 $size,      $ref,     $agent
) = @rec = $_ =~ /($expression)/i;

#print $dayofmonth, "\n";
if ($!) {
 my ($approot) = $req =~ m!^\/(\w+)\/.*$!x;
 my ($refhost) = $ref =~ m!https?\:\/\/([a-z.]+)\/.+!x;
 my $v = sprintf "%s %s, %s", $month, $dayofmonth, $year;
 $row{$response}{'byreq'}{$v}{$req}++;
 my $hkey = sprintf "%s-%s", $approot, $refhost;
 my $dkey = sprintf "%s-%s", $hourofday,$hkey;
 #print Dumper(\@rec);

 #die "deeeee", $v;
 $row{$response}{'byreqref'}{$v}{$req}{$ref}++;
 $row{$response}{'byhodreq'}{$v}{$hourofday}++;
 $row{$response}{'byipsub'}{$v}{$ip}++;
 $row{$response}{'bycontext'}{$v}{$approot}++;
 $row{$response}{'byrefhost'}{$v}{$hkey}++;
 $row{$response}{'byrefhosthod'}{$v}{$dkey}++;

}
else {
 print "skiping $_\n";
}

No comments:

Post a Comment