-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 1eb6e33
Showing
8 changed files
with
633 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
MIT License | ||
|
||
Copyright (c) 2020 Kishore Kumar | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
# comparesc | ||
### CSV Comparison on steriods | ||
|
||
## Usage | ||
|
||
```console | ||
comparesv [-h] [-v] [--enc1 ENCODING] [--enc2 ENCODING] [-i] | ||
[-rm ROW_MATCH] [-cm COLUMN_MATCH] [-sm STRING_MATCH] [-ir] | ||
[-ic] [-is] [-s] | ||
[FILE1] [FILE2] | ||
|
||
CSV files comparison | ||
|
||
positional arguments: | ||
FILE1 the first CSV file | ||
FILE2 the second CSV file | ||
|
||
optional arguments: | ||
-h, --help show this help message and exit | ||
-v, --version show program's version number and exit | ||
--enc1 ENCODING encoding of the first file (default is to autodetect) | ||
--enc2 ENCODING encoding of the second file (default is to autodetect) | ||
-i, --ignore-case ignore case (default is case-sensitive) | ||
-rm ROW_MATCH, --row-match ROW_MATCH | ||
Logic to be used to identify the rows. Possible | ||
options 'order', 'fuzzy', 'deep' (default is order) | ||
-cm COLUMN_MATCH, --column-match COLUMN_MATCH | ||
Logic to be used to identify the columns. Possible | ||
options 'exact','fuzzy' (default is exact) | ||
-sm STRING_MATCH, --string-match STRING_MATCH | ||
Logic to be used to identify the columns. Possible | ||
options 'exact','fuzzy' (default is exact) | ||
-ir, --include-addnl-rows | ||
Include added additional added rows from second file | ||
(default is false) | ||
-ic, --include-addnl-columns | ||
Include added additional columns from second file | ||
(default is false) | ||
-is, --include-stats Include stats (default is false) | ||
-s, --save-output Save output to file | ||
``` | ||
|
||
## Description | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
import sys | ||
import os | ||
import io | ||
import csv | ||
import logging | ||
import warnings | ||
import argparse | ||
import chardet | ||
import tqdm | ||
import comparesv | ||
from version import __version__ | ||
|
||
from pprint import pprint | ||
|
||
# This file is derived from https://github.com/maxharlow/csvmatch/blob/master/cli.py | ||
|
||
def main(): | ||
logging.captureWarnings(True) | ||
logging.basicConfig(level=logging.WARN, format='Warning: %(message)s') | ||
warnings.formatwarning = lambda e, *args: str(e) | ||
sys.stderr.write('Starting up...\n') | ||
try: | ||
file1, file2, args = arguments() | ||
print(args) | ||
data1, headers1 = read(*file1) | ||
data2, headers2 = read(*file2) | ||
results = comparesv.run(data1, headers1, data2, headers2, ticker=ticker, **args) | ||
# formatted = format(results['values'],results['headers']) | ||
if args.get("save_output"): | ||
save_file("values.csv", results['headers'], results['values']) | ||
save_file("results.csv", results['headers'], results['results']) | ||
pprint(results['stats']) | ||
sys.stdout.flush() | ||
except BaseException as e: | ||
sys.exit(e) | ||
|
||
|
||
def ticker(text, total): | ||
progress = tqdm.tqdm(bar_format=text + ' |{bar}| {percentage:3.0f}% / {remaining} left', total=total) | ||
return progress.update | ||
|
||
|
||
def read(filename, encoding): | ||
if not os.path.isfile(filename) and filename != '-': | ||
raise Exception(filename + ': no such file') | ||
file = sys.stdin if filename == '-' else io.open(filename, 'rb') | ||
text = file.read() | ||
if text == '': | ||
raise Exception(filename + ': file is empty') | ||
if not encoding: | ||
detector = chardet.universaldetector.UniversalDetector() | ||
text_lines = text.split(b'\n') | ||
for i in range(0, len(text_lines)): | ||
detector.feed(text_lines[i]) | ||
if detector.done: | ||
break | ||
detector.close() | ||
encoding = detector.result['encoding'] # can't always be relied upon | ||
sys.stderr.write(filename + ': autodetected character encoding as ' + encoding.upper() + '\n') | ||
try: | ||
text_decoded = text.decode(encoding) | ||
reader = csv.reader(io.StringIO(text_decoded, newline=None)) | ||
headers = next(reader) | ||
return list(reader), headers | ||
except UnicodeDecodeError as e: | ||
raise Exception(filename + ': could not read file -- try specifying the encoding') | ||
except csv.Error as e: | ||
raise Exception(filename + ': could not read file as a CSV') | ||
|
||
|
||
def arguments(): | ||
parser = argparse.ArgumentParser(description='CSV files comparison') | ||
parser.add_argument('-v', '--version', action='version', version=__version__) | ||
parser.add_argument('FILE1', nargs='?', default='-', help='the first CSV file') | ||
parser.add_argument('FILE2', nargs='?', default='-', help='the second CSV file') | ||
parser.add_argument('--enc1', type=str, metavar='ENCODING', help='encoding of the first file (default is to autodetect)') | ||
parser.add_argument('--enc2', type=str, metavar='ENCODING', help='encoding of the second file (default is to autodetect)') | ||
parser.add_argument('-i', '--ignore-case', action='store_true', help='ignore case (default is case-sensitive)') | ||
parser.add_argument('-rm', '--row-match', default='order', help='Logic to be used to identify the rows. Possible options \'order\', \'fuzzy\', \'deep\' (default is order)') | ||
parser.add_argument('-cm', '--column-match', default='exact', help='Logic to be used to identify the columns. Possible options \'exact\',\'fuzzy\' (default is exact)') | ||
parser.add_argument('-sm', '--string-match', default='exact', help='Logic to be used to identify the columns. Possible options \'exact\',\'fuzzy\' (default is exact)') | ||
parser.add_argument('-ir', '--include-addnl-rows', action='store_true', help='Include additional rows from second file (default is false)') | ||
parser.add_argument('-ic', '--include-addnl-columns', action='store_true', help='Include additional columns from second file (default is false)') | ||
parser.add_argument('-is', '--include-stats', default=True, action='store_true', help='Include stats (default is true)') | ||
parser.add_argument('-s', '--save-output', default=True, action='store_true', help='Save output to file. This saves the output in the current directory (default is true)') | ||
|
||
args = vars(parser.parse_args()) | ||
if args['FILE1'] == '-' and args['FILE2'] == '-': | ||
parser.print_help(sys.stderr) | ||
parser.exit(1) | ||
file1 = args.pop('FILE1') | ||
file2 = args.pop('FILE2') | ||
enc1 = args.pop('enc1') | ||
enc2 = args.pop('enc2') | ||
return (file1, enc1), (file2, enc2), args | ||
|
||
def save_file(file_name, keys, results): | ||
updated_keys = ['S.No'] + keys | ||
updated_results = [[idx+1]+result for idx,result in enumerate(results)] | ||
|
||
curr_dir = os.getcwd() | ||
with open(os.getcwd() + os.path.sep + file_name, 'w') as file: | ||
writer = csv.writer(file, lineterminator='\n') # can't use dictwriter as headers are printed even when there's no results | ||
writer.writerow(updated_keys) | ||
writer.writerows(updated_results) | ||
|
||
def format(results, keys): | ||
writer_io = io.StringIO() | ||
writer = csv.writer(writer_io, lineterminator='\n') # can't use dictwriter as headers are printed even when there's no results | ||
writer.writerow(keys) | ||
writer.writerows(results) | ||
return writer_io.getvalue()[:-1] | ||
|
||
|
||
if __name__ == '__main__': | ||
main() | ||
|
Oops, something went wrong.