-
-
Save akr4/1297970 to your computer and use it in GitHub Desktop.
#daimonscala 19-2 "Apache access_log(combined) parser"
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
object LogParser { | |
import java.net._ | |
import org.joda.time.DateTime | |
import org.joda.time.format.DateTimeFormat | |
case class Access( | |
ipAddress: InetAddress, | |
ident: String, | |
user: String, | |
time: DateTime, | |
method: String, | |
uri: URI, | |
version: String, | |
status: Int, | |
bytes: Int, | |
referrer: String, | |
userAgent: String | |
) | |
import util.parsing.combinator._ | |
object AccessLogParser extends RegexParsers { | |
override val whiteSpace = """[ \t]+""".r | |
import java.text.SimpleDateFormat | |
val timeFormat = DateTimeFormat.forPattern("dd/MMM/yyyy:HH:mm:ss Z").withLocale(java.util.Locale.US) | |
def eol: Parser[Any] = """\r?\n""".r ^^ { s => "" } | |
def lines: Parser[List[Access]] = (line*) | |
def line: Parser[Access] = | |
ipAddress ~ ident ~ user ~ time ~ method ~ uri ~ version ~ status ~ bytes ~ referrer ~ userAgent <~ eol ^^ { | |
case ipAddress ~ ident ~ user ~ time ~ method ~ uri ~ version ~ status ~ bytes ~ referrer ~ userAgent => | |
Access(ipAddress, ident, user, time, method, uri, version, status, bytes, referrer, userAgent) | |
} | |
def ipAddress: Parser[InetAddress] = """[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}""".r ^^ (s => InetAddress.getByName(s)) | |
def ident: Parser[String] = """[(?:\w+)-]""".r | |
def user: Parser[String] = """[(?:\w+)-]""".r | |
def time: Parser[DateTime] = "[" ~> """\S+ [^ \]]+""".r <~ "]" ^^ { timeFormat.parseDateTime(_) } | |
def method: Parser[String] = "\"" ~> """[A-Z]+""".r | |
def uri: Parser[URI] = """\S+""".r ^^ { new URI(_) } | |
def version: Parser[String] = """[^ "]+""".r <~ "\"" | |
def status: Parser[Int] = """\d+""".r ^^ { _.toInt } | |
def bytes: Parser[Int] = """[^ ]+""".r ^^ { case "-" => 0; case s => s.toInt } | |
def referrer: Parser[String] = "\"" ~> """[^"]+""".r <~ "\"" | |
def userAgent: Parser[String] = "\"" ~> """[^"]+""".r <~ "\"" | |
def parse(json: String): ParseResult[Any] = parseAll(lines, json) | |
} | |
def time: Parser[DateTime] = "[" ~> """\S+ [^ \]]+""".r <~ "]" ^^ { timeFormat.parseDateTime(_) } | |
def method: Parser[String] = "\"" ~> """[A-Z]+""".r | |
def uri: Parser[URI] = """\S+""".r ^^ { new URI(_) } | |
def version: Parser[String] = """[^ "]+""".r <~ "\"" | |
def status: Parser[Int] = """\d+""".r ^^ { _.toInt } | |
def bytes: Parser[Int] = """[^ ]+""".r ^^ { case "-" => 0; case s => s.toInt } | |
def referrer: Parser[String] = "\"" ~> """[^"]+""".r <~ "\"" | |
def userAgent: Parser[String] = "\"" ~> """[^"]+""".r <~ "\"" | |
def parse(json: String): ParseResult[Any] = parseAll(lines, json) | |
} | |
def main(args: Array[String]) { | |
val accessLog = """66.249.69.220 - - [03/Oct/2011:01:22:54 +0900] "GET /blog/23/ HTTP/1.1" 200 22716 "-" "SAMSUNG-SGH-E250/1.0 Profile/MIDP-2.0 Configuration/CLDC-1.1 UP.Browser/6.2.3.3.c.1.101 (GUI) MMP/2.0 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)" | |
64.233.172.34 - - [16/Oct/2011:04:03:27 +0900] "GET /api/search/?format=atom&q=testtest HTTP/1.1" 200 20 "-" "Rome Client (http://tinyurl.com/64t5n) Ver: UNKNOWN AppEngine-Google; (+http://code.google.com/appengine; appid: xxxxx)" | |
64.233.172.36 - - [19/Oct/2011:05:18:52 +0900] "GET / HTTP/1.1" 304 - "http://twitter.com/" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.202 Safari/535.1" | |
""" | |
val result = AccessLogParser.parse(accessLog) | |
println(result.get) | |
//println(result.get) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment