From 4fab20323265551617d10546a2b53dfaf86a42b7 Mon Sep 17 00:00:00 2001 From: Mohamed Akram Date: Mon, 8 Jan 2024 15:50:25 +0400 Subject: [PATCH] Add support for multiple files --- jawk | 38 +++++++++++++++++++++++++++++--------- src/jawk.awk | 8 ++++++-- src/jawk.sh | 30 +++++++++++++++++++++++------- test/run.sh | 21 ++++++++++++++++----- 4 files changed, 74 insertions(+), 23 deletions(-) diff --git a/jawk b/jawk index 611584d..45c7e84 100755 --- a/jawk +++ b/jawk @@ -5,7 +5,7 @@ set -eu name=`basename $0` usage() { - echo "usage: $name [-v var=value] [-f progfile | 'prog'] [file]" >&2 + echo >&2 "usage: $name [-v var=value] [-f progfile | 'prog'] [file ...]" exit 1 } @@ -22,7 +22,7 @@ done i=0 skip= f= -file= +files= for arg do i=$((i+1)) shift @@ -34,19 +34,25 @@ for arg do fi else if [ ! "$p" ]; then prog=$arg; p=1 - elif [ ! "$f" ]; then file=$arg; f=1 - else usage; fi + else + if [ ! "$f" ]; then files="$arg" + else files=$(printf '%s\t%s' "$files" "$arg"); fi + f=$((f+1)); + fi continue fi set -- "$@" "$arg" done if [ ! "$p" ]; then usage; fi -if [ ! "$f" ]; then file=-; fi jawk=' -BEGIN { JSON="\1"; TYPE="\2"; __KEYS="\3"; FS="\n"; __jawk__init() } -{ __parse_value($0); $0 = _[JSON]; NR = ++__NR; FILENAME = __FILENAME } +BEGIN { + ARGC=__ARGC=split(__ARGV, ARGV, "\t")+1; ARGV[0]=__ARGV0; RS=FS="\n" + JSON="\1"; TYPE="\2"; __KEYS="\3"; __jawk__init() +} +/^---/ { FILENAME=substr($0, 4); __FNR=0; next } +{ ARGC=__ARGC; __parse_value($0); $0=_[JSON]; NR=++__NR; FNR=++__FNR } function __jawk__init(i) { __CHAR[0] = "\0"; __CHAR[1] = "\1"; __CHAR[2] = "\2" @@ -306,9 +312,23 @@ STRING="\"$CHAR*($ESCAPE$CHAR*)*\"" NUMBER='-?(0|[1-9][0-9]*)([.][0-9]*)?([eE][+-]?[0-9]*)?' KEYWORD='null|false|true' SPACE='[[:space:]]+' +JSON="$STRING|$NUMBER|$KEYWORD|[][{}:,]" + +prog="$jawk +$prog +BEGIN { ARGC=1; RS=FS=\"\n\" } { ARGC=1; RS=FS=\"\n\" } +" : ${AWK=$(command -v gawk || echo awk)} : ${EGREP=$(command -v ugrep || echo 'grep -E')} -$EGREP -o "$STRING|$NUMBER|$KEYWORD|[][{}:,]" "$file" | -$AWK -v __FILENAME="$file" "$@" "$jawk$prog" +if [ ! "$f" ]; then + $EGREP -o "$JSON" - 2>/dev/null | $AWK -v __ARGV0="$0" "$@" "$prog" +else + IFS=$(printf '\t') + for file in $files; do + unset IFS + printf -- '---%s\n' "$file" + $EGREP -o "$JSON" "$file" 2>/dev/null + done | $AWK -v __ARGV0="$0" -v __ARGV="$files" "$@" "$prog" +fi diff --git a/src/jawk.awk b/src/jawk.awk index 06075ee..56ca448 100644 --- a/src/jawk.awk +++ b/src/jawk.awk @@ -1,5 +1,9 @@ -BEGIN { JSON="\1"; TYPE="\2"; __KEYS="\3"; FS="\n"; __jawk__init() } -{ __parse_value($0); $0 = _[JSON]; NR = ++__NR; FILENAME = __FILENAME } +BEGIN { + ARGC=__ARGC=split(__ARGV, ARGV, "\t")+1; ARGV[0]=__ARGV0; RS=FS="\n" + JSON="\1"; TYPE="\2"; __KEYS="\3"; __jawk__init() +} +/^---/ { FILENAME=substr($0, 4); __FNR=0; next } +{ ARGC=__ARGC; __parse_value($0); $0=_[JSON]; NR=++__NR; FNR=++__FNR } function __jawk__init(i) { __CHAR[0] = "\0"; __CHAR[1] = "\1"; __CHAR[2] = "\2" diff --git a/src/jawk.sh b/src/jawk.sh index 4eaf5bb..f799393 100755 --- a/src/jawk.sh +++ b/src/jawk.sh @@ -5,7 +5,7 @@ set -eu name=`basename $0` usage() { - echo "usage: $name [-v var=value] [-f progfile | 'prog'] [file]" >&2 + echo >&2 "usage: $name [-v var=value] [-f progfile | 'prog'] [file ...]" exit 1 } @@ -22,7 +22,7 @@ done i=0 skip= f= -file= +files= for arg do i=$((i+1)) shift @@ -37,15 +37,17 @@ for arg do # Get prog and file else if [ ! "$p" ]; then prog=$arg; p=1 - elif [ ! "$f" ]; then file=$arg; f=1 - else usage; fi + else + if [ ! "$f" ]; then files="$arg" + else files=$(printf '%s\t%s' "$files" "$arg"); fi + f=$((f+1)); + fi continue fi set -- "$@" "$arg" done if [ ! "$p" ]; then usage; fi -if [ ! "$f" ]; then file=-; fi jawk=$(cat jawk.awk) @@ -55,9 +57,23 @@ STRING="\"$CHAR*($ESCAPE$CHAR*)*\"" NUMBER='-?(0|[1-9][0-9]*)([.][0-9]*)?([eE][+-]?[0-9]*)?' KEYWORD='null|false|true' SPACE='[[:space:]]+' +JSON="$STRING|$NUMBER|$KEYWORD|[][{}:,]" + +prog="$jawk +$prog +BEGIN { ARGC=1; RS=FS=\"\n\" } { ARGC=1; RS=FS=\"\n\" } +" : ${AWK=$(command -v gawk || echo awk)} : ${EGREP=$(command -v ugrep || echo 'grep -E')} -$EGREP -o "$STRING|$NUMBER|$KEYWORD|[][{}:,]" "$file" | -$AWK -v __FILENAME="$file" "$@" "$jawk$prog" +if [ ! "$f" ]; then + $EGREP -o "$JSON" - 2>/dev/null | $AWK -v __ARGV0="$0" "$@" "$prog" +else + IFS=$(printf '\t') + for file in $files; do + unset IFS + printf -- '---%s\n' "$file" + $EGREP -o "$JSON" "$file" 2>/dev/null + done | $AWK -v __ARGV0="$0" -v __ARGV="$files" "$@" "$prog" +fi diff --git a/test/run.sh b/test/run.sh index 7e28ac2..40d124b 100755 --- a/test/run.sh +++ b/test/run.sh @@ -167,11 +167,22 @@ test "trailing space in JSON" out=$(printf '{"a":1, \n"b":2}' | jawk '{print _["b"]}') [ "$out" = '2' ] -test "file" -echo '{"age":10}' >test/test.json -out=$(jawk '{print FILENAME;print _["age"]}' test/test.json; rm test/test.json) -[ "$out" = "$(printf 'test/test.json\n10')" ] - test "NR" out=$(printf '{\n"age":10\n}\n{\n"age":12\n}' | jawk '{print NR}') [ "$out" = "$(printf '1\n2')" ] + +test "files" +printf '{"age":10}\n{"age":20}' >test/test.json +printf '{"age":30}\n{"age":40}' >test/test2.json +out=$(jawk \ + 'BEGIN{print ARGC,ARGV[0],ARGV[1],ARGV[2]} + {print ARGC,FILENAME,NR,FNR,_["age"]}' \ + test/test.json test/test2.json + rm test/test*.json +) +[ "$out" = "$(printf '%s\n' \ +'3 ./jawk test/test.json test/test2.json' \ +'3 test/test.json 1 1 10' \ +'3 test/test.json 2 2 20' \ +'3 test/test2.json 3 1 30' \ +'3 test/test2.json 4 2 40')" ]