-
Notifications
You must be signed in to change notification settings - Fork 0
/
74-searchinfo
44 lines (36 loc) · 1.26 KB
/
74-searchinfo
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/bin/bash
# searchinfo - Extracts and analyzes search engine traffic indicated in the
# referrer field of a Common Log Format access log.
host="intuitive.com" # change to your domain, as desired
maxmatches=20
count=0
temp="/tmp/$(basename $0).$$"
trap "`which rm` -f $temp" 0
if [ $# -eq 0 ] ; then
echo "Usage: $(basename $0) logfile" >&2
exit 1
fi
if [ ! -r "$1" ] ; then
echo "Error: can't open file $1 for analysis." >&2
exit 1
fi
for URL in $(awk '{ if (length($11) > 4) { print $11 } }' "$1" | \
grep -vE "(/www.$host|/$host)" | grep '?')
do
searchengine="$(echo $URL | cut -d/ -f3 | rev | cut -d. -f1-2 | rev)"
args="$(echo $URL | cut -d\? -f2 | tr '&' '\n' | \
grep -E '(^q=|^sid=|^p=|query=|item=|ask=|name=|topic=)' | \
sed -e 's/+/ /g' -e 's/%20/ /g' -e 's/"//g' | cut -d= -f2)"
if [ ! -z "$args" ] ; then
echo "${searchengine}: $args" >> $temp
else
# No well-known match, show entire GET string instead...
echo "${searchengine} $(echo $URL | cut -d\? -f2)" >> $temp
fi
count="$(( $count + 1 ))"
done
echo "Search engine referrer info extracted from ${1}:"
sort $temp | uniq -c | sort -rn | head -$maxmatches | sed 's/^/ /g'
echo ""
echo Scanned $count entries in log file out of $(wc -l < "$1") total.
exit 0