-
-
Notifications
You must be signed in to change notification settings - Fork 2.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
58 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,26 +1,64 @@ | ||
with open('blogs-original.csv', 'r') as f: | ||
file_content = f.read() | ||
import csv | ||
|
||
lines = file_content.split('\n') | ||
|
||
content = lines[0] + '\n' | ||
csv_file_path = 'blogs-original.csv' | ||
|
||
for line in lines[1:]: | ||
line = line.strip() | ||
if not line: | ||
continue | ||
def check_tags(tags: str): | ||
assert tags == tags.strip(), "leading/trailing whitespace" | ||
assert not any([tag.startswith(' ') or tag.endswith(' ') for tag in tags.split(',')]), "leading/trailing whitespace in tags" | ||
assert not any([tag == '' for tag in tags.split(',')]), "empty tag" | ||
|
||
parts = line.split(',') | ||
if len(parts) != 4: | ||
continue | ||
parts = [part.strip() for part in parts] | ||
tags_list = tags.split('; ') | ||
# print(tags_list) | ||
assert not any([';' in tag for tag in tags_list]), "no trailing space after `;`" | ||
assert not any([tag for tag in tags_list if tag != tag.strip()]), "leading/trailing whitespace in tags" | ||
assert len(tags_list) == len(set(tags_list)), "duplicate tag(s)" | ||
|
||
if parts[3]: | ||
parts[3] = parts[3].strip().replace(';', ';') | ||
tags = parts[3].split(';') | ||
tags = [tag.strip() for tag in tags] | ||
parts[3] = '; '.join([tag for tag in tags if tag]) | ||
content += ', '.join(parts) + '\n' | ||
def check_csv(csv_file_path): | ||
with open(csv_file_path, mode='r', encoding='utf-8') as file: | ||
reader = csv.DictReader(file,skipinitialspace=True,strict=True) | ||
for row in reader: | ||
print(row) | ||
|
||
with open('blogs-original.csv', 'w') as f: | ||
f.write(content) | ||
assert None not in row.values(), "None value" | ||
assert ['Introduction', 'Address', 'RSS feed', 'tags'] == list(row.keys()), "incorrect column names/order" | ||
assert None not in row, "incorrect number of , characters" | ||
assert not any(['|' in value for value in row.values()]), "contains `|` character(s)" | ||
|
||
Introduction = row['Introduction'] | ||
Address = row['Address'] | ||
RSS_feed = row['RSS feed'] | ||
tags = row['tags'] | ||
|
||
assert Introduction and Address, "empty value" | ||
assert Introduction == Introduction.strip(), "leading/trailing whitespace" | ||
assert Address == Address.strip(), "leading/trailing whitespace" | ||
assert Introduction != Address, "Introduction and Address are the same" | ||
|
||
assert not any(['#' in [Address, RSS_feed]]), "Address or RSS_feed contains `#` character(s)" | ||
|
||
if not Address.endswith('/'): | ||
pass | ||
# print("Warning: Address does not end with `/`") | ||
|
||
assert Address.startswith('http://') or Address.startswith('https://'), "Address does not start with `http(s)://`" | ||
|
||
if RSS_feed: | ||
assert RSS_feed.startswith('http://') or RSS_feed.startswith('https://'), "RSS feed does not start with `http(s)://`" | ||
assert RSS_feed != Address, "RSS feed and Address are the same" | ||
|
||
if tags: | ||
check_tags(tags) | ||
|
||
def check_nnl(csv_file_path): | ||
with open(csv_file_path, mode='r', encoding='utf-8') as file: | ||
for i, line in enumerate(file, start=1): | ||
assert line.endswith('\n'), f"Line {i} does not end with a newline" | ||
|
||
def main(csv_file_path): | ||
check_csv(csv_file_path) | ||
check_nnl(csv_file_path) | ||
|
||
|
||
if __name__ == "__main__": | ||
main(csv_file_path) |