[tor-commits] [webstats/master] Add logimport.sh and logarchive.sh to git
runa at torproject.org
runa at torproject.org
Thu Jan 19 08:49:20 UTC 2012
commit acfa536cf24f253dfdfc1321e0033c878553110a
Author: Runa A. Sandvik <runa.sandvik at gmail.com>
Date: Thu Jan 19 08:49:04 2012 +0000
Add logimport.sh and logarchive.sh to git
---
logarchive.sh | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
logimport.sh | 47 ++++++++++++++++++++++++++++++++++++
2 files changed, 121 insertions(+), 0 deletions(-)
diff --git a/logarchive.sh b/logarchive.sh
new file mode 100644
index 0000000..bab17b5
--- /dev/null
+++ b/logarchive.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+#
+# Define a few variables
+logs="/srv/webstats.torproject.org/home/webstats/out"
+archive="/srv/webstats.torproject.org/archive"
+log_archive="/srv/webstats.torproject.org/archive/weblogs"
+data_archive="/srv/webstats.torproject.org/archive/data"
+data_publish="/srv/webstats.torproject.org/htdocs/data"
+record="/srv/webstats.torproject.org/logarchive.log"
+
+# Define the hosts we have logs for
+hosts=(
+ metrics.torproject.org-access.log
+)
+
+# See if we actually have logs to process
+if [ ! "$(ls -A $logs)" ];
+then
+ echo "`date` - No logs to process" >> $record
+ exit 1
+fi
+
+# rsync ALL the logs so that we can delete them from the out/ directory
+# later
+rsync -ar "$logs/" "$log_archive/"
+
+for host in "${hosts[@]}"
+do
+ year=`find "$log_archive/" -name $host | cut -d / -f 6 | sort | uniq`
+
+ for y in $year; do
+ month=`find "$log_archive/$y/" -name $host | cut -d / -f 7 | sort | uniq`
+ cd "$archive/"
+
+ for m in $month; do
+ all_the_logs=`find "weblogs/$y/$m" -name $host`
+ for i in $all_the_logs; do
+ # If we already have a tarball, check to see if the file is already a
+ # part of it
+ if [ -e "$data_archive/$host-$y-$m.tar" ]
+ then
+ # See if the file is already a part of the tarball
+ tar --list --file="$data_archive/$host-$y-$m.tar" $i
+ check_exists=`echo $?`
+ if [[ $check_exists -eq "1" ]]
+ then
+ tar --append --file="$host-$y-$m.tar" $i
+ fi
+ else
+ tar --append --file="$host-$y-$m.tar" $i
+ fi
+ done
+
+ # If we did create a new tarball, move it
+ if [ -e "$host-$y-$m.tar" ]
+ then
+ mv "$host-$y-$m.tar" "$data_archive"
+ fi
+
+ # Pack up the tarball and make it available online
+ cd "$data_archive"
+ bzip2 -kf1 "$host-$y-$m.tar"
+ mv "$host-$y-$m.tar.bz2" "$data_publish"
+
+ done
+
+ # And remove logs from the out/ directory
+ rm -rf "$logs/$y/"
+
+ done
+
+ # And report back
+ echo "`date` - Logs processed for $host" >> $record
+done
diff --git a/logimport.sh b/logimport.sh
new file mode 100644
index 0000000..438e3dd
--- /dev/null
+++ b/logimport.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+#
+# The sanitized logs are in /srv/webstats.torproject.org/home/webstats/out/,
+# with the following format: year/month/day/$virtualhost-access.log
+#
+# Need to concat the logs in chronological order and import into
+# awstats and webalizer.
+
+# Define a few variables
+logtmp="/srv/webstats.torproject.org/logtmp"
+logs="/srv/webstats.torproject.org/home/webstats/out"
+record="/srv/webstats.torproject.org/logimport.log"
+
+# Define the hosts we have logs for
+hosts=(
+ metrics
+)
+
+# See if we actually have logs to process
+if [ ! "$(ls -A $logs)" ];
+then
+ echo "`date` - No logs to process" >> $record
+ exit 1
+fi
+
+for host in "${hosts[@]}"
+do
+ # Concat the logs in chronological order
+ mkdir "$logtmp"
+ cd "$logtmp"
+ find "$logs/" -name "$host.torproject.org-access.log" | sort | xargs -I {} cat {} > "$host.torproject.org-access.log"
+
+ # Time to run the web log analysis tools
+ #
+ # Running awstats first
+ /srv/webstats.torproject.org/awstats/awstats.pl -config=$host.torproject.org -update
+
+ # And then webalizer
+ webalizer -c "/srv/webstats.torproject.org/configs/webalizer.$host.torproject.org.conf"
+
+ # Cleanup
+ cd "/srv/webstats.torproject.org"
+ rm -rf "$logtmp"
+
+ # And report back
+ echo "`date` - Logs processed for $host" >> $record
+done
More information about the tor-commits
mailing list