From c070cbd0550c0a1b95ad5e0fb903cccbca63b4e0 Mon Sep 17 00:00:00 2001 From: xiaclo Date: Mon, 14 Jan 2013 14:39:03 +1100 Subject: [PATCH] Update patterns/grok-patterns This is a personal preference, but for web logs, I prefer the parser to capture what it can. Currently with an invalid request, it fails completely rather than capturing the other log information such as date, bytes transferred and HTTP status. This patch captures the invalid request into @fields.rawrequest and leaves @fields.verb, @fields.request and @fields.httpversion as nulls if it cannot be properly parsed. Here is a sample of invalid requests I have from my logs: 115.70.170.86 - - [31/Oct/2012:06:41:24 +1100] "G" 408 0 "-" "-" 165.86.71.20 - - [31/Oct/2012:04:27:01 +1100] "GET http://dis.us.criteo.com/dis/dis.aspx?&t1=sendEvent&c=2&p=3937&p1=v%3D2%26wi%3D7715628%26pt1%3D0%26pt2%3D1%26si%3D1&cb=21664477550&ref=&sc_r=1280x1024&sc_d=32 HTTP/1.0" 400 672 "-" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET4.0C; .NET4.0E)" Obviously these are not valid requests, and I prefer to handle them this way, but the change is up to you. --- patterns/grok-patterns | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/patterns/grok-patterns b/patterns/grok-patterns index 13688ec8e..6291fd080 100755 --- a/patterns/grok-patterns +++ b/patterns/grok-patterns @@ -91,7 +91,7 @@ QS %{QUOTEDSTRING} # Log formats SYSLOGBASE %{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}: -COMBINEDAPACHELOG %{IPORHOST:clientip} %{USER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|-)" %{NUMBER:response} (?:%{NUMBER:bytes}|-) %{QS:referrer} %{QS:agent} +COMBINEDAPACHELOG %{IPORHOST:clientip} %{USER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})" %{NUMBER:response} (?:%{NUMBER:bytes}|-) %{QS:referrer} %{QS:agent} # Log Levels LOGLEVEL ([T|t]race|TRACE|[D|d]ebug|DEBUG|[N|n]otice|NOTICE|[I|i]nfo|INFO|[W|w]arn?(?:ing)?|WARN?(?:ING)?|[E|e]rr?(?:or)?|ERR?(?:OR)?|[C|c]rit?(?:ical)?|CRIT?(?:ICAL)?|[F|f]atal|FATAL|[S|s]evere|SEVERE)