No Separate UsageLogParser Assembly

Jul 10, 2009 at 6:56 AM

Hi Stefan,

Great solution! Let's get rid of this separate UsageLogParser assembly. It just complicates the deployment and has no benefits. Here's the 1:1 VB.NET counterpart so you can include it directly in your ETL package! Hope it's useful.

Cheers Pav

---------------------------------------------------------------

Imports System
Imports System.Data
Imports System.Collections.Generic
Imports System.IO
Imports System.Text
Imports System.ComponentModel

Public Class MOSS2007LogParser
    Private Const FILEHEADERLENGTH As Integer = 300
    Private Const RECORDHEADERLENGTH As Integer = 50
    Private _logRecords As List(Of LogRecord) = New List(Of LogRecord)

    Public Function GetLogDataSet(ByVal logFilePath As String) As DataSet

        ReadLogFile(logFilePath)

        Dim dsSTSLogs As DataSet = New DataSet("dsSTSLogs")
        Dim dtSTSLogs As DataTable = New DataTable("dtSTSLogs")

        Dim dcTimeStamp As DataColumn = New DataColumn("TimeStamp")
        Dim dcSiteGuid As DataColumn = New DataColumn("SiteGUID")
        Dim dcSiteUrl As DataColumn = New DataColumn("SiteUrl")
        Dim dcWeb As DataColumn = New DataColumn("Web")
        Dim dcDocument As DataColumn = New DataColumn("Document")
        Dim dcUserName As DataColumn = New DataColumn("UserName")
        Dim dcQueryString As DataColumn = New DataColumn("QueryString")
        Dim dcReferral As DataColumn = New DataColumn("Referral")
        Dim dcUserAgent As DataColumn = New DataColumn("UserAgent")
        Dim dcCommand As DataColumn = New DataColumn("Command")
        Dim dcHttpStatus As DataColumn = New DataColumn("HttpStatus")
        Dim dcBytesSent As DataColumn = New DataColumn("BytesSent")

        dtSTSLogs.Columns.Add(dcTimeStamp)
        dtSTSLogs.Columns.Add(dcSiteGuid)
        dtSTSLogs.Columns.Add(dcSiteUrl)
        dtSTSLogs.Columns.Add(dcWeb)
        dtSTSLogs.Columns.Add(dcDocument)
        dtSTSLogs.Columns.Add(dcUserName)
        dtSTSLogs.Columns.Add(dcQueryString)
        dtSTSLogs.Columns.Add(dcReferral)
        dtSTSLogs.Columns.Add(dcUserAgent)
        dtSTSLogs.Columns.Add(dcCommand)
        dtSTSLogs.Columns.Add(dcHttpStatus)
        dtSTSLogs.Columns.Add(dcBytesSent)

        dsSTSLogs.Tables.Add(dtSTSLogs)

        For Each record As LogRecord In _logRecords
            Dim drRecord As DataRow = dtSTSLogs.NewRow()

            drRecord("TimeStamp") = record.TimeStamp
            drRecord("SiteGUID") = record.SiteGUID
            drRecord("SiteUrl") = record.SiteUrl
            drRecord("Web") = record.Web
            drRecord("Document") = record.Document
            drRecord("UserName") = record.UserName
            drRecord("QueryString") = record.QueryString
            drRecord("Referral") = record.Referral
            drRecord("UserAgent") = record.UserAgent
            drRecord("Command") = record.Command
            drRecord("HttpStatus") = record.Status
            drRecord("BytesSent") = record.BytesSent

            dtSTSLogs.Rows.Add(drRecord)
        Next

        GetLogDataSet = dsSTSLogs
    End Function

    Private Sub ReadLogFile(ByVal logFilePath As String)
        If File.Exists(logFilePath) = False Then Exit Sub

        _logRecords.Clear()

        ' Open a stream to the log file
        ' I recommend using a Memory-mapped file here, as the files can grow quite large.
        ' http://www.winterdom.com/dev/dotnet/ shows a fairly good implementation [see: FileMap]

        Dim s As Stream = File.OpenRead(logFilePath)
        Dim br As BinaryReader = New BinaryReader(s)
        Dim fileHeaderData As Byte() = br.ReadBytes(FILEHEADERLENGTH)

        Do While s.Position < s.Length
            ' Read the record header array
            Dim recordHeader As Byte() = br.ReadBytes(RECORDHEADERLENGTH)
            Dim headerData As LogRecordHeader = New LogRecordHeader(recordHeader)

            ' Get the length of the current record and read the array
            Dim recordLength As Integer = headerData.RecordLength()
            Dim recordData As Byte() = br.ReadBytes(recordLength)

            _logRecords.Add(New LogRecord(headerData, recordData))
        Loop

        br.Close()
        s.Close()

    End Sub
End Class

Public Class LogRecordHeader

    Public Const STANDARD_GUID_LENGTH As Integer = 36
    Public Const STANDARD_TIMESTAMP_LENGTH As Integer = 8

    Public Const SITEURL_OFFSET As Integer = 12
    Public Const WEB_OFFSET As Integer = 14
    Public Const DOC_OFFSET As Integer = 16
    Public Const BYTESSENT_OFFSET As Integer = 20
    Public Const HTTPSTATUS_OFFSET As Integer = 24
    Public Const USERNAME_OFFSET As Integer = 26
    Public Const QUERYSTRING_OFFSET As Integer = 28
    Public Const REFERRAL_OFFSET As Integer = 30
    Public Const USERAGENT_OFFSET As Integer = 32
    Public Const COMMAND_OFFSET As Integer = 34

    Private _recordHeader As Byte()
    Private _siteUrlLength As Integer
    Private _webLength As Integer
    Private _docLength As Integer
    Private _userNameLength As Integer
    Private _bytesSent As Long
    Private _httpStatus As Integer
    Private _queryStringLength As Integer
    Private _referralLength As Integer
    Private _userAgentLength As Integer
    Private _commandLength As Integer

    Public ReadOnly Property SiteUrlLength() As Integer
        Get
            Return _siteUrlLength
        End Get
    End Property

    Public ReadOnly Property WebLength() As Integer
        Get
            Return _webLength
        End Get
    End Property

    Public ReadOnly Property DocLength() As Integer
        Get
            Return _docLength
        End Get
    End Property

    Public ReadOnly Property UserNameLength() As Integer
        Get
            Return _userNameLength
        End Get
    End Property

    Public ReadOnly Property BytesSent() As Long
        Get
            Return _bytesSent
        End Get
    End Property

    Public ReadOnly Property HttpStatus() As Integer
        Get
            Return _httpStatus
        End Get
    End Property

    Public ReadOnly Property QueryStringLength() As Integer
        Get
            Return _queryStringLength
        End Get
    End Property

    Public ReadOnly Property ReferralLength() As Integer
        Get
            Return _referralLength
        End Get
    End Property

    Public ReadOnly Property UserAgentLength() As Integer
        Get
            Return _userAgentLength
        End Get
    End Property

    Public ReadOnly Property CommandLength() As Integer
        Get
            Return _commandLength
        End Get
    End Property

    Public Sub New(ByVal recordHeader As Byte())
        _recordHeader = recordHeader
        _siteUrlLength = BitConverter.ToUInt16(recordHeader, SITEURL_OFFSET)
        _webLength = BitConverter.ToUInt16(recordHeader, WEB_OFFSET)
        _docLength = BitConverter.ToUInt16(recordHeader, DOC_OFFSET)
        _bytesSent = BitConverter.ToUInt32(recordHeader, BYTESSENT_OFFSET)
        _httpStatus = BitConverter.ToUInt16(recordHeader, HTTPSTATUS_OFFSET)
        _userNameLength = BitConverter.ToUInt16(recordHeader, USERNAME_OFFSET)
        _queryStringLength = BitConverter.ToUInt16(recordHeader, QUERYSTRING_OFFSET)
        _referralLength = BitConverter.ToUInt16(recordHeader, REFERRAL_OFFSET)
        _userAgentLength = BitConverter.ToUInt16(recordHeader, USERAGENT_OFFSET)
        _commandLength = BitConverter.ToUInt16(recordHeader, COMMAND_OFFSET)
    End Sub

    Public Function RecordLength() As Integer
        RecordLength = STANDARD_GUID_LENGTH + 1 + _
                    STANDARD_TIMESTAMP_LENGTH + 1 + _
                    SiteUrlLength + 1 + _
                    WebLength + 1 + _
                    DocLength + 1 + _
                    UserNameLength + 1 + _
                    QueryStringLength + 1 + _
                    ReferralLength + 1 + _
                    UserAgentLength + 1 + _
                    STANDARD_GUID_LENGTH + 1 + _
                    CommandLength + 1
    End Function
End Class

Public Class LogRecord
    Private _siteGuid As String
    Private _timeStamp As String
    Private _siteUrl As String
    Private _web As String
    Private _document As String
    Private _userName As String
    Private _queryString As String
    Private _referral As String
    Private _userAgent As String
    Private _command As String
    Private _status As Integer
    Private _bytesSent As Long

    Public ReadOnly Property SiteGUID() As String
        Get
            Return _siteGuid
        End Get
    End Property

    Public ReadOnly Property TimeStamp() As String
        Get
            Return _timeStamp
        End Get
    End Property

    Public ReadOnly Property SiteUrl() As String
        Get
            Return _siteUrl
        End Get
    End Property

    Public ReadOnly Property Web() As String
        Get
            Return _web
        End Get
    End Property

    Public ReadOnly Property Document() As String
        Get
            Return _document
        End Get
    End Property

    Public ReadOnly Property UserName() As String
        Get
            Return _userName
        End Get
    End Property

    Public ReadOnly Property QueryString() As String
        Get
            Return _queryString
        End Get
    End Property

    Public ReadOnly Property Referral() As String
        Get
            Return _referral
        End Get
    End Property

    Public ReadOnly Property UserAgent() As String
        Get
            Return _userAgent
        End Get
    End Property

    Public ReadOnly Property Command() As String
        Get
            Return _command
        End Get
    End Property

    Public ReadOnly Property Status() As Integer
        Get
            Return _status
        End Get
    End Property

    Public ReadOnly Property BytesSent() As Long
        Get
            Return _bytesSent
        End Get
    End Property

    Public Sub New(ByRef headerData As LogRecordHeader, ByVal recordData As Byte())
        Dim timeStampOffset As Integer = LogRecordHeader.STANDARD_GUID_LENGTH + 1
        Dim siteUrlOffset As Integer = timeStampOffset + LogRecordHeader.STANDARD_TIMESTAMP_LENGTH + 1
        Dim webOffset As Integer = siteUrlOffset + headerData.SiteUrlLength + 1
        Dim documentOffset As Integer = webOffset + headerData.WebLength + 1
        Dim userNameOffset As Integer = documentOffset + headerData.DocLength + 1
        Dim queryStringOffset As Integer = userNameOffset + headerData.UserNameLength + 1
        Dim referralOffset As Integer = queryStringOffset + headerData.QueryStringLength + 1
        Dim userAgentOffset As Integer = referralOffset + headerData.ReferralLength + 1
        Dim commandOffset As Integer = userAgentOffset + headerData.UserAgentLength + 1

        Dim enc As Encoding = UTF8Encoding.Default

        _siteGuid = enc.GetString(recordData, 0, LogRecordHeader.STANDARD_GUID_LENGTH)
        _timeStamp = enc.GetString(recordData, timeStampOffset, LogRecordHeader.STANDARD_TIMESTAMP_LENGTH)
        _siteUrl = enc.GetString(recordData, siteUrlOffset, headerData.SiteUrlLength)
        _web = enc.GetString(recordData, webOffset, headerData.WebLength)
        _document = enc.GetString(recordData, documentOffset, headerData.DocLength)
        _userName = enc.GetString(recordData, userNameOffset, headerData.UserNameLength)
        _queryString = enc.GetString(recordData, queryStringOffset, headerData.QueryStringLength)
        _referral = enc.GetString(recordData, referralOffset, headerData.ReferralLength)
        _userAgent = enc.GetString(recordData, userAgentOffset, headerData.UserAgentLength)
        _command = enc.GetString(recordData, commandOffset, headerData.CommandLength)
        _status = headerData.HttpStatus
        _bytesSent = headerData.BytesSent
    End Sub

End Class