#!/usr/bin/perl

use strict;

use CGI::Carp qw(fatalsToBrowser);
use Time::Local;
use Time::HiRes qw(time);
#use HTTP::UA::Parser;
use HTTP::BrowserDetect();

###################
# Initialisierung #
###################

my $gScriptName = $ENV{'SCRIPT_NAME'};
my $gDocumentRoot = $ENV{'DOCUMENT_ROOT'};

my $gFileNameTemplateHtml = 'showlog.html';
my $gFileNameLogDb = 'pagelog.db';
my $gSettingsDb = 'settings.db';

my $gLastShowTime = 9999999999;
my $gOutputGeneratedTime = time();
my $gStartTimeHiRes = time;

my @gLines;

my @gBotStrings = (
#	'AhrefsBot',
#	'Applebot',
#	'bingbot',
	'Dataprovider',
#	'Googlebot',
#	'YandexBot',
	'bot',
	'crawler',
	'spider'
);

my $gBotMarker = '<span style="background-color: yellow;">&nbsp;BOT&nbsp;</span>';

# statistische Werte, durch CreateStatistics in einem ersten Durchlauf ermittelt:
my $gTotalHits = 0;
my %HitsPerTarget;
my %HitsPerReferrer;
my %HitsPerLanguage;
my %HitsPerBrowser;
my %HitsPerOS;
my %HitsPerDevice;

my $gTotalHitsNew = 0;
my %HitsPerTargetNew;
my %HitsPerReferrerNew;
my %HitsPerLanguageNew;
my %HitsPerBrowserNew;
my %HitsPerOSNew;
my %HitsPerDeviceNew;

#################
# Hauptprogramm #
#################

PrintHttpHeader();
ReadSettings();
LoadEntries();
EnumerateEntries();
SortEntries();
CreateStatistics();
PrintEntries();
SaveSettings();
exit 0;

##############
# Funktionen #
##############

sub PrintHttpHeader
{
	print "Cache-Control: no-cache\n";
	print "Pragma: no-cache\n";
	print "Content-type: text/html; charset=utf-8\n\n";
}

#
# Liest die Einstellungen aus der DB-Datei.
#
sub ReadSettings
{
	if (open(DB, "$gSettingsDb"))
	{
		my @Lines = <DB>;
		close(DB);

		for my $Line (@Lines)
		{
			chomp $Line;
			if (index($Line, 'LastShowTime=') == 0)
			{
				$gLastShowTime = (split(/\=/, $Line))[1];
			}
		}
	}
}

#
# Schreibt die Einstellungen in die DB-Datei.
#
sub SaveSettings
{
	my @Lines;
	push @Lines, "LastShowTime=$gOutputGeneratedTime\n"; 

	if (open(DB, ">$gSettingsDb"))
	{
		print DB @Lines;
		close(DB);
	}
}

#
# Liest die Zeilen aus der DB-Datei in ein String-Array ein.
#
sub LoadEntries
{
	open(DB, "$gFileNameLogDb") or die "File not found: $gFileNameLogDb";
	@gLines = <DB>;
	close(DB);
}

#
# Fügt den Zeilen eine Spalte mit fortlaufender Zeilennummer hinzu.
# Diese entspricht dann der Zeilennummer in der DB-Datei.
#
sub EnumerateEntries
{
#	my $uaParser = HTTP::UA::Parser->new();
	my $lineNo = 1;
	for my $Line (@gLines)
	{
		chomp $Line;

		my ($time, $target, $referrer, $language, $winsize, $userAgent) = split(/\|/, $Line);

		$target = '-' if ($target eq '');
		$referrer = '-' if ($referrer eq '');
		$language = '-' if ($language eq '');

#		$uaParser->parse($userAgent);
#		my $browser = $uaParser->ua->family;
#		my $os = $uaParser->os->family;
#		my $device = ($uaParser->device->brand ne '' || $uaParser->device->model ne '') ? $uaParser->device->brand . ' ' . $uaParser->device->model : $uaParser->device->family;
# HTTP::UA::Parser ist sehr langsam, daher HTTP::BrowserDetect verwenden:
		my $uaParser = HTTP::BrowserDetect->new($userAgent);
		my $browser = $uaParser->browser_string()  || 'Other';
		my $os = $uaParser->os_string() || 'Other';
		my $device = $uaParser->device_string() || ($uaParser->robot_string() ? $uaParser->robot_string() . $gBotMarker : 'Other');

		$Line = "$lineNo|$time|$target|$referrer|$language|$winsize|$userAgent|$browser|$os|$device\n";
		$lineNo++;
	}
}

#
# Sortiert die Zeilen so um, dass die zu einem Target gehörenden Einträge zusammengefasst (gruppiert) werden.
# Die Sortierung dieser Target-Gruppen erfolgt nach dem Datum des letzten Eintrags, so dass Target-Gruppen
# mit dem neuesten Eintrag weiter oben erscheinen.
#
sub SortEntries
{
	my %existingTargets;
	for my $Line (@gLines)
	{
		my $time = (split(/\|/, $Line))[1];
		my $target = (split(/\|/, $Line))[2];
		$existingTargets{$target} = $time;
	}

	my @LinesNew;
	for my $existingTarget (sort { $existingTargets{$b} <=> $existingTargets{$a} } keys %existingTargets)
	{
		for my $Line (sort { CompareEntries($b, $a) } @gLines)
		{
			my $target = (split(/\|/, $Line))[2];
			if ($target eq $existingTarget)
			{
				push @LinesNew, $Line;
			}
		}
	}
	
	@gLines = @LinesNew;
}

sub CompareEntries
{
	my $a = shift;
	my $b = shift;

	my $time_a = (split(/\|/, $a))[1];
	my $time_b = (split(/\|/, $b))[1];

	$time_a <=> $time_b;
}

#
# Ermittelt in einem ersten Durchlauf statistische Werte, auf die später beim zweiten Durchlauf zurückgegriffen wird.
#
sub CreateStatistics
{
	for my $Line (@gLines)
	{
		chomp $Line;

		my ($lineNo, $time, $target, $referrer, $language, $winsize, $userAgent, $browser, $os, $device) = split(/\|/, $Line);

		$gTotalHits++;
		$HitsPerTarget{$target}++ if ($target);
		$HitsPerReferrer{$referrer}++ if ($referrer);
		$HitsPerLanguage{$language}++ if ($language);
		$HitsPerBrowser{$browser}++ if ($browser);
		$HitsPerOS{$os}++ if ($os);
		$HitsPerDevice{$device}++ if ($device);

		if ($time > $gLastShowTime)
		{
			$gTotalHitsNew++;
			$HitsPerTargetNew{$target}++ if ($target);
			$HitsPerReferrerNew{$referrer}++ if ($referrer);
			$HitsPerLanguageNew{$language}++ if ($language);
			$HitsPerBrowserNew{$browser}++ if ($browser);
			$HitsPerOSNew{$os}++ if ($os);
			$HitsPerDeviceNew{$device}++ if ($device);
		}
	}
}

#
# Erzeugt die HTML-Ausgabe anhand des HTML-Templates.
#
sub PrintEntries
{
	# HTML-Template einlesen:
	open(TPL, "$gFileNameTemplateHtml") or die "File not found: $gFileNameTemplateHtml";
	my @TemplateLines = <TPL>;
	close(TPL);

	# HTML-Templates
	# - Seitenkopf
	my $strTemplatePageHeader = ExtractAndReplaceLines(\@TemplateLines, '<page-header>', '</page-header>', '@PAGE_HEADER@');
	# - Statistik-Tabelle
	my $strTemplateStatData = ExtractAndReplaceLines(\@TemplateLines, '<template-stat-data>', '</template-stat-data>', '@STAT_DATA@');
	my $strTemplateStatTable = ExtractAndReplaceLines(\@TemplateLines, '<template-stat-table>', '</template-stat-table>', '@STAT_TABLE@');
	# - Haupt-Tabelle
	my $strTemplateMainHeader = ExtractAndReplaceLines(\@TemplateLines, '<template-main-header>', '</template-main-header>', '@TABLE@');
	my $strTemplateMainData = ExtractAndReplaceLines(\@TemplateLines, '<template-main-data>', '</template-main-data>', '');
	# - Details-Tabelle
	my $strTemplateDetailsHeader = ExtractAndReplaceLines(\@TemplateLines, '<template-details-header>', '</template-details-header>', '@DETAILS@');
	my $strTemplateDetailsData = ExtractAndReplaceLines(\@TemplateLines, '<template-details-data>', '</template-details-data>', '');
	my $strTemplateDetails = ExtractAndReplaceLines(\@TemplateLines, '<template-details>', '</template-details>', '');

	# HTML-Ausgabe: Statistik-Tabellen
	my $tableStat = '';

	my $nNewTargets = 0;
	PrintStatistics('Target URL', \%HitsPerTarget, \%HitsPerTargetNew, \$nNewTargets, \$strTemplateStatData, \$strTemplateStatTable, \$tableStat);

	my $nNewReferrers = 0;
	PrintStatistics('Referrer URL', \%HitsPerReferrer, \%HitsPerReferrerNew, \$nNewReferrers, \$strTemplateStatData, \$strTemplateStatTable, \$tableStat);

	my $nNewLanguages = 0;
	PrintStatistics('Language', \%HitsPerLanguage, \%HitsPerLanguageNew, \$nNewLanguages, \$strTemplateStatData, \$strTemplateStatTable, \$tableStat);

	my $nNewBrowsers = 0;
	PrintStatistics('Browser', \%HitsPerBrowser, \%HitsPerBrowserNew, \$nNewBrowsers, \$strTemplateStatData, \$strTemplateStatTable, \$tableStat);

	my $nNewOSes = 0;
	PrintStatistics('Operating System', \%HitsPerOS, \%HitsPerOSNew, \$nNewOSes, \$strTemplateStatData, \$strTemplateStatTable, \$tableStat);

	my $nNewDevices = 0;
	PrintStatistics('Device', \%HitsPerDevice, \%HitsPerDeviceNew, \$nNewDevices, \$strTemplateStatData, \$strTemplateStatTable, \$tableStat);

	@TemplateLines = ReplaceLines(\@TemplateLines, '@STAT_TABLE@', $tableStat);

	# HTML-Ausgabe: Haupt-Tabelle
	my $lastTarget = '';
	my $table = '';
	my $tableDetails = '';

	$table .= $strTemplateMainHeader;

	for my $Line (@gLines)
	{
		chomp $Line;

		my ($lineNo, $time, $target, $referrer, $language, $winsize, $userAgent) = split(/\|/, $Line);

		if ($target ne $lastTarget)
		{
			$lastTarget = $target;

			if ($tableDetails ne '')
			{
				my $html = $strTemplateDetails;
				$html = ReplaceLine($html, '@DETAILS@', "$tableDetails");
				$table .= $html;

				$tableDetails = '';
				$table .= $strTemplateMainHeader;
			}

			my $hitsPerTarget = $HitsPerTarget{$target};
			my $hitsPerTargetNew = $HitsPerTargetNew{$target};
			my $bIsNew = ($hitsPerTarget == $hitsPerTargetNew);
			
			my $html = $strTemplateMainData;
			$html = ReplaceLine($html, '@HITS@', $hitsPerTarget || '-');
			$html = ReplaceLine($html, '@NEW@', $hitsPerTargetNew || '-');
			$html = ReplaceLine($html, '@DAYS@', int(($gOutputGeneratedTime - $time) / 86400) || '-');
			$html = ReplaceLine($html, '@TARGET@', TextToHtml($target));
#			$html = ReplaceLine($html, '@CLASS_NEW@', $bIsNew ? 'new-main-data' : '');
			$html = ReplaceLine($html, '@CLASS_NEW@', $bIsNew ? 'new' : '');
			$table .= $html;

			$tableDetails .= $strTemplateDetailsHeader;
		}

		my $html = $strTemplateDetailsData;
		$html = ReplaceLine($html, '@TIME@', FormatDateTime($time));
		$html = ReplaceLine($html, '@REFERRER@',TextToHtml($referrer));
		$html = ReplaceLine($html, '@LANGUAGE@', TextToHtml($language));
		$html = ReplaceLine($html, '@WINSIZE@', TextToHtml($winsize));
		$html = ReplaceLine($html, '@USERAGENT@', (IsBot($userAgent) ? $gBotMarker : '') . TextToHtml($userAgent));
#		$html = ReplaceLine($html, '@CLASS_NEW@', $time > $gLastShowTime ? 'new-details-data' : '');
		$html = ReplaceLine($html, '@CLASS_NEW@', $time > $gLastShowTime ? 'new' : '');
		$tableDetails .= $html;
	}

	if ($tableDetails ne '')
	{
		my $html = $strTemplateDetails;
		$html = ReplaceLine($html, '@DETAILS@', "$tableDetails");
		$table .= $html;
	}

	$strTemplatePageHeader = ReplaceLine($strTemplatePageHeader, '@TOTAL_HITS@', $gTotalHits || 0);
	$strTemplatePageHeader = ReplaceLine($strTemplatePageHeader, '@NEW_HITS@', $gTotalHitsNew);
	$strTemplatePageHeader = ReplaceLine($strTemplatePageHeader, '@NEW_TARGETS@', $nNewTargets);
	$strTemplatePageHeader = ReplaceLine($strTemplatePageHeader, '@NEW_REFERRERS@', $nNewReferrers);

	@TemplateLines = ReplaceLines(\@TemplateLines, '@PAGE_HEADER@', $strTemplatePageHeader);
	@TemplateLines = ReplaceLines(\@TemplateLines, '@TIME_LASTSHOW@', FormatDateTime($gLastShowTime));
	@TemplateLines = ReplaceLines(\@TemplateLines, '@TIME_GENERATED@', FormatDateTime($gOutputGeneratedTime));
	@TemplateLines = ReplaceLines(\@TemplateLines, '@SPAN_GENERATED@', time - $gStartTimeHiRes);
	@TemplateLines = ReplaceLines(\@TemplateLines, '@TABLE@', "$table");
	print @TemplateLines;
}

#
# Gibt statistische Infos aus.
#
sub PrintStatistics
{
	my $strTitle = shift;
	my $refHashHits = shift;
	my $refHashHitsNew = shift;
	my $refNewCount = shift;
	my $refTemplateData = shift;
	my $refTemplateTable = shift;
	my $refTable = shift;
	
	my $data = '';

#	my $bIsUserAgent = ($strTitle eq 'UserAgent');

	for my $key (sort { ${$refHashHits}{$b} <=> ${$refHashHits}{$a} || ${$refHashHitsNew}{$b} <=> ${$refHashHitsNew}{$a} || $a cmp $b } keys %{$refHashHits})
	{
		my $hits = ${$refHashHits}{$key};
		my $hitsNew = ${$refHashHitsNew}{$key};
		my $bIsNew = $hits == $hitsNew;
		
		my $html = ${$refTemplateData};
#		$html = ReplaceLine($html, '@DATA_NAME@', ($bIsUserAgent && IsBot($key) ? $gBotMarker : '') . TextToHtml($key));
		$html = ReplaceLine($html, '@DATA_NAME@', $key);
		$html = ReplaceLine($html, '@DATA_VALUE@', $hits || '-');
		$html = ReplaceLine($html, '@DATA_VALUE_NEW@', $hitsNew || '-');
		$html = ReplaceLine($html, '@CLASS_NEW@', $bIsNew ? 'new' : '');
		$data .= $html;

		${$refNewCount}++ if ($bIsNew);
	}

	my $html = ${$refTemplateTable};
	$html = ReplaceLine($html, '@TITLE@', 'Hits per ' . $strTitle);
	$html = ReplaceLine($html, '@HEADER_NAME@', $strTitle);
	$html = ReplaceLine($html, '@HEADER_VALUE@', 'Hits');
	$html = ReplaceLine($html, '@HEADER_VALUE_NEW@', 'New');
	$html = ReplaceLine($html, '@FOOTER_VALUE@', $gTotalHits);
	$html = ReplaceLine($html, '@FOOTER_VALUE_NEW@', $gTotalHitsNew);
	$html = ReplaceLine($html, '@STAT_DATA@', $data);
	${$refTable} .= $html;
}

###################
# Hilfsfunktionen #
###################

sub ReplaceLines
{
	my $refLines = shift;
	my $strSearch = shift;
	my $strReplace = shift;

	my @ReturnLines = @{$refLines};

	for my $Zeile (@ReturnLines)
	{
		$Zeile =~ s/$strSearch/$strReplace/g;
	}
	
	@ReturnLines;
}

sub ReplaceLine
{
	my $strZeile = shift;
	my $strSearch = shift;
	my $strReplace = shift;

	my $ReturnZeile = $strZeile;

	$ReturnZeile =~ s/$strSearch/$strReplace/g;
	
	$ReturnZeile;
}

sub ExtractAndReplaceLines
{
	my $refLines = shift;
	my $strStart = shift;
	my $strEnd = shift;
	my $strReplace = shift;

	my @LinesNew;
	my $strReturn;

	my $bFound = 0;
	my $bOccurrences = 0;

	for my $Zeile (@{$refLines})
	{
		if($bFound)
		{
			if($Zeile =~ /$strEnd/)
			{
				$bFound = 0;
			}
			else
			{
				$strReturn .= $Zeile if($bOccurrences == 1);
			}
		}
		else
		{
			if($Zeile =~ /$strStart/)
			{
				$bFound = 1;
				$bOccurrences++;
				push @LinesNew, $strReplace;
			}
			else
			{
				push @LinesNew, $Zeile;
			}
		}
	}

	@{$refLines} = @LinesNew;

	$strReturn;
}

sub TextToHtml
{
	my $text = shift;

	$text =~ s/\&/&amp;/g;
	$text =~ s/\"/&quot;/g;
	$text =~ s/\</&lt;/g;
	$text =~ s/\>/&gt;/g;

	$text =~ s/\r\n/<br>/g;
	$text =~ s/\r/<br>/g;
	$text =~ s/\n/<br>/g;

	$text =~ s/\|/&#124;/g;		# Feldtrenner für DB-Dateien

	$text;
}

sub FormatDateTime
{
	my $nSeconds = shift;

	my ($nSecond, $nMinute, $nHour, $nDay, $nMonth, $nYear) = (localtime($nSeconds))[0..5];

	my $strDate = sprintf("%02d.%02d.%04d", $nDay, $nMonth + 1, $nYear + 1900);
	my $strTime = sprintf("%02d:%02d:%02d", $nHour, $nMinute, $nSecond);

	"$strDate&nbsp;$strTime";
}

sub FormatTimeSpan
{
	my $nSeconds = shift;
	
	my $nHours = int($nSeconds / 3600); $nSeconds -= $nHours * 3600;
	my $nMinutes = int($nSeconds / 60); $nSeconds -= $nMinutes * 60;

	sprintf("%02d:%02d:%02d", $nHours, $nMinutes, $nSeconds);
}

sub IsBot
{
	my $bResult = 0;

	for my $ua (@_)
	{
		for my $botString (@gBotStrings)
		{
			if($ua =~ m/$botString/i)
			{
				$bResult = 1;
				last;
			}
		}
	}
	$bResult;
}

########
# Ende #
########
