Skip to content

Commit

Permalink
refactor linter.py
Browse files Browse the repository at this point in the history
  • Loading branch information
yzqzss committed Jul 7, 2024
1 parent 7723326 commit 5994ed8
Showing 1 changed file with 58 additions and 20 deletions.
78 changes: 58 additions & 20 deletions linter.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,64 @@
with open('blogs-original.csv', 'r') as f:
file_content = f.read()
import csv

lines = file_content.split('\n')

content = lines[0] + '\n'
csv_file_path = 'blogs-original.csv'

for line in lines[1:]:
line = line.strip()
if not line:
continue
def check_tags(tags: str):
assert tags == tags.strip(), "leading/trailing whitespace"
assert not any([tag.startswith(' ') or tag.endswith(' ') for tag in tags.split(',')]), "leading/trailing whitespace in tags"
assert not any([tag == '' for tag in tags.split(',')]), "empty tag"

parts = line.split(',')
if len(parts) != 4:
continue
parts = [part.strip() for part in parts]
tags_list = tags.split('; ')
# print(tags_list)
assert not any([';' in tag for tag in tags_list]), "no trailing space after `;`"
assert not any([tag for tag in tags_list if tag != tag.strip()]), "leading/trailing whitespace in tags"
assert len(tags_list) == len(set(tags_list)), "duplicate tag(s)"

if parts[3]:
parts[3] = parts[3].strip().replace(';', ';')
tags = parts[3].split(';')
tags = [tag.strip() for tag in tags]
parts[3] = '; '.join([tag for tag in tags if tag])
content += ', '.join(parts) + '\n'
def check_csv(csv_file_path):
with open(csv_file_path, mode='r', encoding='utf-8') as file:
reader = csv.DictReader(file,skipinitialspace=True,strict=True)
for row in reader:
print(row)

with open('blogs-original.csv', 'w') as f:
f.write(content)
assert None not in row.values(), "None value"
assert ['Introduction', 'Address', 'RSS feed', 'tags'] == list(row.keys()), "incorrect column names/order"
assert None not in row, "incorrect number of , characters"
assert not any(['|' in value for value in row.values()]), "contains `|` character(s)"

Introduction = row['Introduction']
Address = row['Address']
RSS_feed = row['RSS feed']
tags = row['tags']

assert Introduction and Address, "empty value"
assert Introduction == Introduction.strip(), "leading/trailing whitespace"
assert Address == Address.strip(), "leading/trailing whitespace"
assert Introduction != Address, "Introduction and Address are the same"

assert not any(['#' in [Address, RSS_feed]]), "Address or RSS_feed contains `#` character(s)"

if not Address.endswith('/'):
pass
# print("Warning: Address does not end with `/`")

assert Address.startswith('http://') or Address.startswith('https://'), "Address does not start with `http(s)://`"

if RSS_feed:
assert RSS_feed.startswith('http://') or RSS_feed.startswith('https://'), "RSS feed does not start with `http(s)://`"
assert RSS_feed != Address, "RSS feed and Address are the same"

if tags:
check_tags(tags)

def check_nnl(csv_file_path):
with open(csv_file_path, mode='r', encoding='utf-8') as file:
for i, line in enumerate(file, start=1):
assert line.endswith('\n'), f"Line {i} does not end with a newline"

def main(csv_file_path):
check_csv(csv_file_path)
check_nnl(csv_file_path)


if __name__ == "__main__":
main(csv_file_path)

0 comments on commit 5994ed8

Please sign in to comment.