-
Notifications
You must be signed in to change notification settings - Fork 0
/
vndb_barchartrace.py
executable file
·106 lines (83 loc) · 3.7 KB
/
vndb_barchartrace.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import glob
import sys
import pandas as pd
# Modified from HLTB-Barchartrace.py
# Doc: https://blog.vinfall.com/posts/2023/11/hltb/#bar-chart-race
# Code: https://github.com/Vinfall/hltb/blob/370288b0831d49fd29d720b5343ce3ff98714f2a/HLTB-Barchartrace.py#L47-L103
def calculate_number(dataframe):
# Sort the DataFrame by 'Date' column in ascending order
df_sorted = dataframe.sort_values(by="Date")
# Initialize an empty list to store the calculated 'Count' values
count_value = []
# Iterate over each row in the sorted DataFrame
for _index, row in df_sorted.iterrows():
# Get the current 'Date' and 'Labels' values
current_date = row["Date"]
current_label = row["Labels"]
# Count occurrences of current label in rows with dates up to current date
count = (
df_sorted.loc[df_sorted["Date"] <= current_date]
.loc[df_sorted["Labels"] == current_label]
.shape[0]
)
# Append the count to the list of 'Count' values
count_value.append(count)
# Add the 'Count' column to the DataFrame
df_sorted["Count"] = count_value
# Create a new DataFrame with only the 'Date', 'Labels', and 'Count' columns
df_sorted = df_sorted[["Date", "Labels", "Count"]]
# Drop the duplicate rows
df_sorted = df_sorted.drop_duplicates()
# Filter out rows where 'Count' is 0
df_sorted = df_sorted[df_sorted["Count"] != 0]
# Filter out rows where 'Date' is later than '2022-10-31'
# df_sorted = df_sorted[df_sorted['Date'] <= '2022-10-31']
# Create a new DataFrame with all unique 'Date' and 'Labels' combinations
unique_dates = df_sorted["Date"].unique()
unique_platforms = df_sorted["Labels"].unique()
new_index = pd.MultiIndex.from_product(
[unique_dates, unique_platforms], names=["Date", "Labels"]
)
# new_df = df_sorted.set_index(['Date', "Labels"]).reindex(new_index)
new_df = pd.DataFrame(index=new_index).reset_index()
# Merge the new DataFrame with the sorted DataFrame
merged_df = pd.merge(new_df, df_sorted, on=["Date", "Labels"], how="left")
# Forward fill the missing values within each group of same label
merged_df["Count"] = merged_df.groupby("Labels")["Count"].ffill()
# Fill the first 'Count' value of every label with 0
merged_df["Count"] = merged_df.groupby("Labels")["Count"].transform(
lambda x: x.fillna(0)
)
return merged_df
def format_barchartrace(dataframe, date_type):
# Rename date_type column to "Date"
dataframe.rename(columns={date_type: "Date"}, inplace=True)
# Calculate Count of platforms at a specific date
df = calculate_number(dataframe)
return df
# Read CSV file
file_list = glob.glob("vndb-list-export-*.csv")
if len(file_list) > 0:
# Sanitize every file
for filepath in file_list:
new_file_name = filepath.replace("vndb-list-export-", "vndb-list-barchartrace-")
df_raw = pd.read_csv(filepath)
# Accepted vlaues: 'Start date', 'Finish date', 'Release date'
# Note: 'Finish date' does not work much, which is expected
# since other labels would not exist if you finish it already
df_mod = format_barchartrace(df_raw, "Start date")
# Debug preview
print(df_mod.head())
# Export to CSV
df_mod.to_csv(new_file_name, index=False, quoting=1)
else:
print(
"VNDB exported CSV not found.\n\
Please install VNDB List Export and export first.\n\
You can get it from https://github.com/Vinfall/UserScripts#list."
)
sys.exit()
# Seperate this to avoid message flooding in loops
print("Now drop output to https://fabdevgit.github.io/barchartrace")