The common log format would be 
10.75.80.77 2 - - [17/Apr/2012:08:52:32 -0400] "GET /EASE/admin/login.jsf HTTP/1.1" 302 - "-" "Mozilla/5.0"
So here the regex to parse this... 
my $expression =
qr/([0-9\.]+)\s+\d*\s?\-\s\-\s\[(\d+)\/(\w{3})\/(\d{4}):(\d+):.*\s[-+]\d+\]\s\"(POST|GET)\s(.+)\sHTTP\/(\d.\d)\"\s(\d{3})\s(\d+|-)\s\"(\S+)\"\s\"(.+)\"/x;
my (
 $all,       $ip,       $dayofmonth, $month,    $year,
 $hourofday, $reqtype, $req,        $protocol, $response,
 $size,      $ref,     $agent
) = @rec = $_ =~ /($expression)/i;
Here an example of yet another parser implementation of apache logs... that greps all the POST and GET's 
use Data::Dumper;
use Text::Table;
sub line {
 my $x = shift;
 $x = $x ? $x : $W;
 for ( 1 .. $x ) { print "-"; }
 print "\n";
}
BEGIN {
 our $W = 70;
 %row = ();
 print "\n\n";
 line($W);
 printf "[%-35s]\n", $ARGV[$cc];
 line($W);
 $cc++;
}
my @rec =();
my $expression =
qr/([0-9\.]+)\s+\d*\s?\-\s\-\s\[(\d+)\/(\w{3})\/(\d{4}):(\d+):.*\s[-+]\d+\]\s\"(POST|GET)\s(.+)\sHTTP\/(\d.\d)\"\s(\d{3})\s(\d+|-)\s\"(\S+)\"\s\"(.+)\"/x;
my (
 $all,       $ip,       $dayofmonth, $month,    $year,
 $hourofday, $reqtype, $req,        $protocol, $response,
 $size,      $ref,     $agent
) = @rec = $_ =~ /($expression)/i;
#print $dayofmonth, "\n";
if ($!) {
 my ($approot) = $req =~ m!^\/(\w+)\/.*$!x;
 my ($refhost) = $ref =~ m!https?\:\/\/([a-z.]+)\/.+!x;
 my $v = sprintf "%s %s, %s", $month, $dayofmonth, $year;
 $row{$response}{'byreq'}{$v}{$req}++;
 my $hkey = sprintf "%s-%s", $approot, $refhost;
 my $dkey = sprintf "%s-%s", $hourofday,$hkey;
 #print Dumper(\@rec);
 #die "deeeee", $v;
 $row{$response}{'byreqref'}{$v}{$req}{$ref}++;
 $row{$response}{'byhodreq'}{$v}{$hourofday}++;
 $row{$response}{'byipsub'}{$v}{$ip}++;
 $row{$response}{'bycontext'}{$v}{$approot}++;
 $row{$response}{'byrefhost'}{$v}{$hkey}++;
 $row{$response}{'byrefhosthod'}{$v}{$dkey}++;
}
else {
 print "skiping $_\n";
}
No comments:
Post a Comment