[tor-commits] [webstats/master] Add logimport.sh and logarchive.sh to git

runa at torproject.org runa at torproject.org
Thu Jan 19 08:49:20 UTC 2012


commit acfa536cf24f253dfdfc1321e0033c878553110a
Author: Runa A. Sandvik <runa.sandvik at gmail.com>
Date:   Thu Jan 19 08:49:04 2012 +0000

    Add logimport.sh and logarchive.sh to git
---
 logarchive.sh |   74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 logimport.sh  |   47 ++++++++++++++++++++++++++++++++++++
 2 files changed, 121 insertions(+), 0 deletions(-)

diff --git a/logarchive.sh b/logarchive.sh
new file mode 100644
index 0000000..bab17b5
--- /dev/null
+++ b/logarchive.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+#
+# Define a few variables
+logs="/srv/webstats.torproject.org/home/webstats/out"
+archive="/srv/webstats.torproject.org/archive"
+log_archive="/srv/webstats.torproject.org/archive/weblogs"
+data_archive="/srv/webstats.torproject.org/archive/data"
+data_publish="/srv/webstats.torproject.org/htdocs/data"
+record="/srv/webstats.torproject.org/logarchive.log"
+
+# Define the hosts we have logs for
+hosts=(
+    metrics.torproject.org-access.log
+)
+
+# See if we actually have logs to process
+if [ ! "$(ls -A $logs)" ];
+then
+    echo "`date` - No logs to process" >> $record
+    exit 1
+fi
+
+# rsync ALL the logs so that we can delete them from the out/ directory
+# later
+rsync -ar "$logs/" "$log_archive/"
+
+for host in "${hosts[@]}"
+do
+    year=`find "$log_archive/" -name $host | cut -d / -f 6 | sort | uniq`
+
+    for y in $year; do
+        month=`find "$log_archive/$y/" -name $host | cut -d / -f 7 | sort | uniq`
+        cd "$archive/"
+
+        for m in $month; do
+            all_the_logs=`find "weblogs/$y/$m" -name $host`
+            for i in $all_the_logs; do
+                # If we already have a tarball, check to see if the file is already a
+                # part of it
+                if [ -e "$data_archive/$host-$y-$m.tar" ]
+                then
+                    # See if the file is already a part of the tarball
+                    tar --list --file="$data_archive/$host-$y-$m.tar" $i
+                    check_exists=`echo $?`
+                    if [[ $check_exists -eq "1" ]]
+                    then
+                        tar --append --file="$host-$y-$m.tar" $i
+                    fi
+                else
+                        tar --append --file="$host-$y-$m.tar" $i
+                fi
+            done
+
+            # If we did create a new tarball, move it
+            if [ -e "$host-$y-$m.tar" ]
+            then
+                mv "$host-$y-$m.tar" "$data_archive"
+            fi
+
+            # Pack up the tarball and make it available online
+            cd "$data_archive"
+            bzip2 -kf1 "$host-$y-$m.tar"
+            mv "$host-$y-$m.tar.bz2" "$data_publish"
+
+        done
+
+        # And remove logs from the out/ directory
+        rm -rf "$logs/$y/"
+
+    done
+
+    # And report back
+    echo "`date` - Logs processed for $host" >> $record
+done
diff --git a/logimport.sh b/logimport.sh
new file mode 100644
index 0000000..438e3dd
--- /dev/null
+++ b/logimport.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+#
+# The sanitized logs are in /srv/webstats.torproject.org/home/webstats/out/,
+# with the following format: year/month/day/$virtualhost-access.log
+#
+# Need to concat the logs in chronological order and import into
+# awstats and webalizer.
+
+# Define a few variables
+logtmp="/srv/webstats.torproject.org/logtmp"
+logs="/srv/webstats.torproject.org/home/webstats/out"
+record="/srv/webstats.torproject.org/logimport.log"
+
+# Define the hosts we have logs for
+hosts=(
+    metrics
+)
+
+# See if we actually have logs to process
+if [ ! "$(ls -A $logs)" ];
+then
+    echo "`date` - No logs to process" >> $record
+    exit 1
+fi
+
+for host in "${hosts[@]}"
+do
+    # Concat the logs in chronological order
+    mkdir "$logtmp"
+    cd "$logtmp"
+    find "$logs/" -name "$host.torproject.org-access.log" | sort | xargs -I {} cat {} > "$host.torproject.org-access.log"
+
+    # Time to run the web log analysis tools
+    #
+    # Running awstats first
+    /srv/webstats.torproject.org/awstats/awstats.pl -config=$host.torproject.org -update
+
+    # And then webalizer
+    webalizer -c "/srv/webstats.torproject.org/configs/webalizer.$host.torproject.org.conf"
+
+    # Cleanup
+    cd "/srv/webstats.torproject.org"
+    rm -rf "$logtmp"
+
+    # And report back
+    echo "`date` - Logs processed for $host" >> $record
+done



More information about the tor-commits mailing list