Skip to content

Commit

Permalink
Merge pull request #2 from qing762/update
Browse files Browse the repository at this point in the history
Update validate.py
  • Loading branch information
qing762 authored Feb 8, 2024
2 parents 32dcee3 + 5d6259c commit 0592b4e
Show file tree
Hide file tree
Showing 3 changed files with 226 additions and 46 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@

twice.json
upload.py
80 changes: 42 additions & 38 deletions lib/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ async def lintCheck():
)
except subprocess.CalledProcessError as e:
print(f"Linting failed with {str(e)}")
return
sys.exit(1)
try:
subprocess.run(
[
Expand All @@ -44,7 +44,7 @@ async def lintCheck():
)
except subprocess.CalledProcessError as e:
print(f"Linting failed with {str(e)}")
return
sys.exit(1)
print("Linting passed!\n\n")


Expand Down Expand Up @@ -79,7 +79,7 @@ async def validateLinks():
print("All links are valid!\n\n")
else:
print(f"Invalid links found:\n{invalidURL}\n\n")
return
sys.exit(1)


async def validateLang():
Expand All @@ -92,41 +92,45 @@ async def validateLang():
with open(file, "r", encoding="utf-8") as file:
data = json.load(file)

for member in data:
for key, value in data[member]["otherName"].items():
detected = translator.detect(value)
detected_lang = detected.lang
if isinstance(detected_lang, list):
detected_lang = detected_lang[0]
if detected_lang == "zh-CN":
detected_lang = "zh"
if isinstance(detected_lang, str) and detected_lang != key:
if value == "名井南" and detected_lang == "ja":
continue
elif value == "Dubu (Tofu)" and detected_lang == "zh":
continue
elif value == "平井桃" and detected_lang == "ja":
continue
elif value == "凑崎纱夏" and detected_lang == "ja":
continue
else:
all_lang_valid = False
json.dump(
{
"value": value,
"key": key,
"detected_lang": detected_lang,
},
invalidLang,
)
return
print("0")

if all_lang_valid:
print("All languages are correct!\n")
else:
print(f"Incorrect languages found:\n{[x['value'] for x in invalidLang]}\n\n")
return
for x in data:
if x == "member":
for member in data:
for key, value in data[member]["otherNames"].items():
detected = translator.detect(value)
detected_lang = detected.lang
if isinstance(detected_lang, list):
detected_lang = detected_lang[0]
if detected_lang == "zh-CN":
detected_lang = "zh"
if isinstance(detected_lang, str) and detected_lang != key:
if value == "名井南" and detected_lang == "ja":
continue
elif value == "Dubu (Tofu)" and detected_lang == "zh":
continue
elif value == "平井桃" and detected_lang == "ja":
continue
elif value == "凑崎纱夏" and detected_lang == "ja":
continue
else:
all_lang_valid = False
json.dump(
{
"value": value,
"key": key,
"detected_lang": detected_lang,
},
invalidLang,
)
sys.exit(1)
print("0")

if all_lang_valid:
print("All languages are correct!\n")
else:
print(
f"Incorrect languages found:\n{[x['value'] for x in invalidLang]}\n\n"
)
sys.exit(1)


if __name__ == "__main__":
Expand Down
191 changes: 183 additions & 8 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@
import sys
from bs4 import BeautifulSoup
from googletrans import Translator
from lxml import etree


class Main:
async def Main():
async def Member():
memberName = [
"Nayeon",
"Jeongyeon",
Expand Down Expand Up @@ -59,7 +60,7 @@ async def Main():

text = div.find("div", class_="pi-data-value pi-font").text

otherName = {}
otherNames = {}
languages = ["Chinese", "Japanese", "Korean", "English"]

for line in text.split("\n"):
Expand All @@ -75,11 +76,11 @@ async def Main():
lang, lang_name = entry.split(":")
if "Dubu (Tofu)" in lang_name:
lang_name = lang_name.partition("Dubu (Tofu)")[0]
otherName["informal"] = "Dubu (Tofu)"
otherNames["informal"] = "Dubu (Tofu)"
lang = str(langcodes.find(lang.strip()))
otherName[lang] = lang_name.strip()
otherNames[lang] = lang_name.strip()

otherName[native_lang] = native
otherNames[native_lang] = native

birthDate, age = (
soup.find(
Expand Down Expand Up @@ -447,7 +448,7 @@ async def Main():

data = {
"name": name,
"otherName": otherName,
"otherNames": otherNames,
"birthDate": birthDate,
"age": age,
"birthPlace": birthPlace,
Expand Down Expand Up @@ -482,12 +483,186 @@ async def Main():

return memberData

async def ships():
shipsData = {}
shipsURL = []

async with aiohttp.ClientSession() as session:
async with session.get(
"https://twice.fandom.com/wiki/Category:Pairings"
) as response:
soup = BeautifulSoup(await response.text(), "html.parser")
xpath = etree.HTML(await response.text())
div = soup.find("div", class_="category-page__members")

for y in div.find_all("div", class_="category-page__members-wrapper"):
ul = y.find("ul", class_="category-page__members-for-char")
for z in ul.find_all("li", class_="category-page__member"):
div = z.find("div", class_="category-page__member-left")
url = f"https://twice.fandom.com{z.find('a')['href']}"
shipsURL.append(url)
for z in shipsURL:
otherNames = []
similarities = []
differences = []
facts = []
rivals = []
async with session.get(z) as response:
soup = BeautifulSoup(await response.text(), "html.parser")
xpath = etree.HTML(await response.text())
name = (
soup.find(
"h2",
class_="pi-item pi-item-spacing pi-title pi-secondary-background",
attrs={"data-source": "name"},
).get_text()
if soup.find(
"h2",
class_="pi-item pi-item-spacing pi-title pi-secondary-background",
attrs={"data-source": "name"},
)
else (
soup.find(
"h2",
class_="pi-item pi-item-spacing pi-title pi-secondary-background",
attrs={"data-source": "title1"},
).get_text()
if soup.find(
"h2",
class_="pi-item pi-item-spacing pi-title pi-secondary-background",
attrs={"data-source": "title1"},
)
else None
)
)

shipped = (
soup.find(
"div",
class_="pi-item pi-data pi-item-spacing pi-border-color",
attrs={"data-source": "shipped"},
)
.find("div", class_="pi-data-value pi-font")
.get_text()
.split(" and ")
if soup.find(
"div",
class_="pi-item pi-data pi-item-spacing pi-border-color",
attrs={"data-source": "shipped"},
)
else None
)

if soup.find(
"div",
class_="pi-item pi-data pi-item-spacing pi-border-color",
attrs={"data-source": "other names"},
):
for y in (
soup.find(
"div",
class_="pi-item pi-data pi-item-spacing pi-border-color",
attrs={"data-source": "other names"},
)
.find("div", class_="pi-data-value pi-font")
.find("ul")
.find_all("li")
):
otherNames.append(y.get_text())

if soup.find(
"div",
class_="pi-item pi-data pi-item-spacing pi-border-color",
attrs={"data-source": "rivals"},
):
for y in (
soup.find(
"div",
class_="pi-item pi-data pi-item-spacing pi-border-color",
attrs={"data-source": "rivals"},
)
.find("div", class_="pi-data-value pi-font")
.find("ul")
.find_all("li")
):
rivals.append(y.get_text())

similaritiesSearch = xpath.xpath(
'//*[@id="mw-content-text"]/div/ul[2]'
)
if similaritiesSearch:
similaritiesSearch = BeautifulSoup(
etree.tostring(
similaritiesSearch[0],
pretty_print=True,
).decode(),
"html.parser",
).get_text()
similarities = [s for s in similaritiesSearch.split("\n") if s]

differencesSearch = xpath.xpath(
'//*[@id="mw-content-text"]/div/ul[3]'
)
if differencesSearch:
differencesSearch = BeautifulSoup(
etree.tostring(
differencesSearch[0],
pretty_print=True,
).decode(),
"html.parser",
).get_text()
differences = [d for d in differencesSearch.split("\n") if d]

factsSearch = xpath.xpath('//*[@id="mw-content-text"]/div/ul[4]')
if factsSearch:
factsSearch = BeautifulSoup(
etree.tostring(
factsSearch[0],
pretty_print=True,
).decode(),
"html.parser",
).get_text()
facts = [f for f in factsSearch.split("\n") if f]

images = (
soup.find(
"figure",
class_="pi-item pi-image",
attrs={"data-source": "image"},
).find("a", class_="image image-thumbnail")["href"]
+ "&format=original"
if soup.find(
"figure",
class_="pi-item pi-image",
attrs={"data-source": "image"},
).find("a", class_="image image-thumbnail")
else None
)

data = {
"name": name,
"shipped": shipped,
"otherNames": otherNames,
"rivals": rivals,
"similarities": similarities,
"differences": differences,
"facts": facts,
"images": images,
"fandom": z,
}

shipsData[name] = data

return shipsData


if __name__ == "__main__":
try:
memberData = asyncio.run(Main.Main())
memberData = asyncio.run(Main.Member())
shipsData = asyncio.run(Main.ships())
combinedData = {"member": memberData, "ships": shipsData}
with open("twice.json", "w", encoding="utf-8") as f:
json.dump(memberData, f, indent=2)
json.dump(combinedData, f, indent=4)
except KeyboardInterrupt:
print("Process stopping due to keyboard interrupt")
try:
Expand Down

0 comments on commit 0592b4e

Please sign in to comment.