Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
  • Loading branch information
jarelllama authored Mar 31, 2024
1 parent d542ac3 commit ef64db0
Show file tree
Hide file tree
Showing 6 changed files with 27 additions and 29 deletions.
2 changes: 1 addition & 1 deletion functions/build_lists.sh
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ format_file() {
bash functions/tools.sh format "$1"
}

# Build list functions are to specify the syntax of the lists.
# Build list functions are to specify the syntax of the lists for the build function.
# $syntax: name of list syntax
# $directory: directory to create list in
# $comment: character used for comments (blank defaults to #)
Expand Down
9 changes: 5 additions & 4 deletions functions/check_dead.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ main() {
check_alive
update_light_file

# Cache dead domains (skip processing dead domains through alive check)
# Cache dead domains (done last to skip alive domains check)
cat dead_in_raw.tmp >> "$DEAD_DOMAINS"
format_file "$DEAD_DOMAINS"
}
Expand All @@ -53,8 +53,8 @@ check_subdomains() {

# Remove dead root domains from raw file and root domains file
comm -23 "$RAW" <(printf "%s" "$dead_root_domains") > raw.tmp
mv raw.tmp "$RAW"
comm -23 "$ROOT_DOMAINS" <(printf "%s" "$dead_root_domains") > root.tmp
mv raw.tmp "$RAW"
mv root.tmp "$ROOT_DOMAINS"

log_event "$dead_root_domains" dead raw
Expand Down Expand Up @@ -86,8 +86,8 @@ check_redundant() {

# Remove unused wildcards from raw file and wildcards file
comm -23 "$RAW" collated_dead_wildcards.tmp > raw.tmp
mv raw.tmp "$RAW"
comm -23 "$WILDCARDS" collated_dead_wildcards.tmp > wildcards.tmp
mv raw.tmp "$RAW"
mv wildcards.tmp "$WILDCARDS"

log_event "$(<collated_dead_wildcards.tmp)" dead wildcard
Expand Down Expand Up @@ -127,7 +127,8 @@ check_alive() {
alive_domains="$(printf "%s" "$alive_domains" | sed "s/^${subdomain}\.//" | sort -u)"
done < "$SUBDOMAINS_TO_REMOVE"

printf "%s\n" "$alive_domains" >> "$RAW" # Add resurrected domains to raw file
# Add resurrected domains to raw file
printf "%s\n" "$alive_domains" >> "$RAW"
format_file "$RAW"

log_event "$alive_domains" resurrected dead_domains_file
Expand Down
19 changes: 8 additions & 11 deletions functions/check_parked.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,6 @@ main() {
}

remove_parked_domains() {
# Reset split files before run
find . -maxdepth 1 -type f -name "x??" -delete

printf "\n[start] Analyzing %s entries for parked domains\n" "$(wc -l < "$RAW")"

# Retrieve parked domains and return if none found
retrieve_parked "$RAW" || return

Expand All @@ -38,11 +33,6 @@ remove_parked_domains() {
}

add_unparked_domains() {
# Reset split files before run
find . -maxdepth 1 -type f -name "x??" -delete

printf "\n[start] Analyzing %s entries for unparked domains\n" "$(wc -l < "$PARKED_DOMAINS")"

# Retrieve parked domains and return if none found
retrieve_parked "$PARKED_DOMAINS" || return

Expand All @@ -66,7 +56,13 @@ add_unparked_domains() {
# Output:
# exit status 1 if no parked domains were found
retrieve_parked() {
: > parked_domains.tmp # Truncate parked domains (prevents missing file error)
# Truncate parked domains (prevents missing file error)
: > parked_domains.tmp
# Truncate split files before run
find . -maxdepth 1 -type f -name "x??" -delete

printf "\n[info] Processing file %s\n
[start] Analyzing %s entries for parked domains\n" "$1" "$(wc -l < "$1")"

# Split file into 12 equal files
split -d -l $(($(wc -l < "$1")/12)) "$1"
Expand Down Expand Up @@ -160,4 +156,5 @@ cleanup() {
}

trap cleanup EXIT

main
1 change: 1 addition & 0 deletions functions/tools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Tools.sh is a shell wrapper intended to store commonly used functions.

# Function 'format' is called to standardize the format of a file.
# $1: file to be formatted
format() {
local -r file="$1"
[[ ! -f "$file" ]] && return
Expand Down
20 changes: 10 additions & 10 deletions functions/update_readme.sh
Original file line number Diff line number Diff line change
Expand Up @@ -134,28 +134,28 @@ Thanks to the following people for the help, inspiration, and support!
EOF
}

# Function 'print_stats' prints the various statistics for each source
# $1: source to process (leave blank to process all sources).
# Function 'print_stats' is an echo wrapper that returns the statistics for each source.
# $1: source to process (default is all sources)
print_stats() {
[[ -n "$1" ]] && source="$1" || source='All sources'
printf "%5s |%10s |%8s%% | %s\n" "$(sum "$TODAY" "$1")" \
"$(sum "$YESTERDAY" "$1")" "$(sum_excluded "$1" )" "$source"
}

# Function 'sum' is an echo wrapper that sums up the domains retrieved by
# that source for that particular day.
# Function 'sum' is an echo wrapper that returns the total sum of domains retrieved
# by that source for that particular day.
# $1: day to process
# $2: source to process
# $2: source to process (default is all sources)
sum() {
# Print dash if no runs for that day found
! grep -qF "$1" "$SOURCE_LOG" && { printf "-"; return; }
csvgrep -c 1 -m "$1" "$SOURCE_LOG" | csvgrep -c 2 -m "$2" | csvgrep -c 14 -m yes |
csvcut -c 5 | awk '{total += $1} END {print total}'
}

# Function 'count_excluded' is an echo wrapper that counts the % of excluded domains
# of raw count retrieved from each source.
# $1: source to process
# Function 'count_excluded' is an echo wrapper that returns the % of excluded domains
# out of the raw count retrieved from each source.
# $1: source to process (default is all sources)
count_excluded() {
csvgrep -c 2 -m "$1" "$SOURCE_LOG" | csvgrep -c 14 -m yes > source_rows.tmp

Expand All @@ -172,8 +172,8 @@ count_excluded() {
printf "%s" "$((excluded_count*100/raw_count))"
}

# Function 'format_file' is a shell wrapper to standardize the format of a file
# $1: file to format.
# Function 'format_file' is a shell wrapper to standardize the format of a file.
# $1: file to format
format_file() {
bash functions/tools.sh format "$1"
}
Expand Down
5 changes: 2 additions & 3 deletions functions/validate_raw.sh
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ validate_raw() {

# Collate filtered wildcards to exclude from dead check
printf "%s\n" "$wildcards" >> "$WILDCARDS"
# Collate filtered redundant domains for dead check
# Collate filtered redundant domains for dead check
grep -Ff <(printf "%s" "$wildcards") redundant_domains.tmp >> "$REDUNDANT_DOMAINS"

format_file "$WILDCARDS"
Expand All @@ -138,10 +138,9 @@ validate_raw() {
printf "\n\e[1mProblematic domains (%s):\e[0m\n" "$(wc -l < filter_log.tmp)"
sort -u filter_log.tmp

# Save changes to raw file and raw light file
printf "%s\n" "$domains" > "$RAW"
format_file "$RAW"

# Remove filtered domains from light file
comm -12 "$RAW" "$RAW_LIGHT" > light.tmp && mv light.tmp "$RAW_LIGHT"

total_whitelisted_count="$((whitelisted_count + whitelisted_tld_count))"
Expand Down

0 comments on commit ef64db0

Please sign in to comment.