The common log format would be
10.75.80.77 2 - - [17/Apr/2012:08:52:32 -0400] "GET /EASE/admin/login.jsf HTTP/1.1" 302 - "-" "Mozilla/5.0"
So here the regex to parse this...
my $expression =
qr/([0-9\.]+)\s+\d*\s?\-\s\-\s\[(\d+)\/(\w{3})\/(\d{4}):(\d+):.*\s[-+]\d+\]\s\"(POST|GET)\s(.+)\sHTTP\/(\d.\d)\"\s(\d{3})\s(\d+|-)\s\"(\S+)\"\s\"(.+)\"/x;
my (
$all, $ip, $dayofmonth, $month, $year,
$hourofday, $reqtype, $req, $protocol, $response,
$size, $ref, $agent
) = @rec = $_ =~ /($expression)/i;
Here an example of yet another parser implementation of apache logs... that greps all the POST and GET's
use Data::Dumper;
use Text::Table;
sub line {
my $x = shift;
$x = $x ? $x : $W;
for ( 1 .. $x ) { print "-"; }
print "\n";
}
BEGIN {
our $W = 70;
%row = ();
print "\n\n";
line($W);
printf "[%-35s]\n", $ARGV[$cc];
line($W);
$cc++;
}
my @rec =();
my $expression =
qr/([0-9\.]+)\s+\d*\s?\-\s\-\s\[(\d+)\/(\w{3})\/(\d{4}):(\d+):.*\s[-+]\d+\]\s\"(POST|GET)\s(.+)\sHTTP\/(\d.\d)\"\s(\d{3})\s(\d+|-)\s\"(\S+)\"\s\"(.+)\"/x;
my (
$all, $ip, $dayofmonth, $month, $year,
$hourofday, $reqtype, $req, $protocol, $response,
$size, $ref, $agent
) = @rec = $_ =~ /($expression)/i;
#print $dayofmonth, "\n";
if ($!) {
my ($approot) = $req =~ m!^\/(\w+)\/.*$!x;
my ($refhost) = $ref =~ m!https?\:\/\/([a-z.]+)\/.+!x;
my $v = sprintf "%s %s, %s", $month, $dayofmonth, $year;
$row{$response}{'byreq'}{$v}{$req}++;
my $hkey = sprintf "%s-%s", $approot, $refhost;
my $dkey = sprintf "%s-%s", $hourofday,$hkey;
#print Dumper(\@rec);
#die "deeeee", $v;
$row{$response}{'byreqref'}{$v}{$req}{$ref}++;
$row{$response}{'byhodreq'}{$v}{$hourofday}++;
$row{$response}{'byipsub'}{$v}{$ip}++;
$row{$response}{'bycontext'}{$v}{$approot}++;
$row{$response}{'byrefhost'}{$v}{$hkey}++;
$row{$response}{'byrefhosthod'}{$v}{$dkey}++;
}
else {
print "skiping $_\n";
}
No comments:
Post a Comment