/* Richard A. DeVenezia * January 10, 2005 * * Use SAS to read an Apache log file in common format * Related info can be found here: http://www.devenezia.com/perl/http-log * * Posted to SAS-L thread "Parsing Apache log files", 10JAN2005 */ /* * This version 9 SAS code uses the new prx* functions * to parse each line of an Apache log file that is in common format */ %let logfile = c:\logs\access_log; data log; infile "&logfile" lrecl=32767 end=end; rx = prxParse ('/^(\S+) (\S+) (\S+) \[(.+)\] \"(.+)\" (\S+) (\S+) \"(.*)\" \"(.*)\"/') ; * process each line of log file; do _n_ = 1 by 1 until (end); * read current line into _infile_ buffer; input; start = 1; stop = length (_infile_); * perform pattern matching to identify the log parts; call prxNext (rx,start,stop,_infile_,matchAt,matchLength); * extract the matches into SAS variables; if prxMatch (rx, _infile_) then do; clientAddress = prxPosN (rx, 1, _infile_); rfc1413 = prxPosN (rx, 2, _infile_); username = prxPosN (rx, 3, _infile_); localTime = prxPosN (rx, 4, _infile_); httpRequest = prxPosN (rx, 5, _infile_); statusCode = prxPosN (rx, 6, _infile_); bytesSent = prxPosN (rx, 7, _infile_); referer = prxPosN (rx, 8, _infile_); clientSoftware= prxPosN (rx, 9, _infile_); * output the log parts to a row in the SAS table; output; end; else put _n_ 'data line in Apache log file is not in common format'; end; keep clientAddress--clientSoftware; run; /* * You should tweak as needed to reduce the size of your SAS table * and change some of the variables to numbers by using the INPUT() * i.e. * _bytesSent = prxPosN (rx, 7, _infile_); * bytesSent = input (_bytesSent,best12.); * drop _:; */