awk で apache のアクセスログを解析してみる修行
動的ファイルへのアクセスを拾って、接続元の IP アドレスごとのファイルに振り分けてみたり。
BEGIN { except_ext["gif"] = 1; except_ext["jpg"] = 1; except_ext["swf"] = 1; except_ext["html"] = 1; except_ext["css"] = 1; except_ext["js"] = 1; except_ext["ico"] = 1; except_ext["/"] = 1; except_ext["txt"] = 1; except_ext["csv"] = 1; except_ip[""] = 1; month["Jan"] = "01"; month["Feb"] = "02"; month["Mar"] = "03"; month["Apr"] = "04"; month["May"] = "05"; month["Jun"] = "06"; month["Jul"] = "07"; month["Aug"] = "08"; month["Sep"] = "09"; month["Oct"] = "10"; month["Nov"] = "11"; month["Dec"] = "12"; } { ip = $1; xx = $2; xy = $3; tm = $4; tz = $5; method = $6; url = $7; version = $8; retcode = $9; split(tm, a, /[\[:\/]/ ); day = a[4] month[a[3]] a[2]; time = a[5] a[6] a[7]; path = url; gsub(/\?(.*)$/, "", path); if ( index( url, "?" ) > 0 ) { query = url; gsub(/^([^\?]+\?)/, "", query); } else { query = ""; } ext = gensub( "^(.*)[\.]([^\.]+)$", "\\2", "", path ); } except_ext[ext] > 0 { next; } { print day, time, ip, retcode, path, query, "{" ext "}" except_[ext] > prefix "_" ip; }