diff --git a/main.py b/main.py index a515139..6f43842 100644 --- a/main.py +++ b/main.py @@ -1,43 +1,48 @@ import matplotlib.pyplot as plt import numpy as np +import read_pdf from utils.calculate_growth_rate import calculate_growth_rate from utils.replace_text import replace_text -# Data -quarters = ["Q1 FY24", "Q2 FY24", "Q3 FY24", "Q4 FY24", "Q1 FY25"] -data_center = [4284, 10323, 14514, 18404, 22563] -gaming = [2240, 2486, 2856, 2865, 2647] -professional_visualization = [295, 379, 416, 463, 427] -auto = [296, 253, 261, 281, 329] -oem_other = [77, 66, 73, 90, 78] -total = [7192, 13507, 18120, 22103, 26044] +# Step 1: Extract data from the PDF +data = read_pdf.extract_data_from_pdf('Rev_by_Mkt_Qtrly_Trend_Q125.pdf') -# Calculate growth rates as percentages with + or - -growth_rates = [calculate_growth_rate(total[i], total[i - 1]) for i in range(len(total))] +# Step 2: Assign data to variables +quarters = data['quarters'] +data_center = data['data_center'] +gaming = data['gaming'] +professional_visualization = data['professional_visualization'] +auto = data['auto'] +oem_other = data['oem_other'] +total = data['total'] -# Print growth rates +# Step 3: Calculate growth rates as percentages with + or - +growth_rates = [calculate_growth_rate(total[i], total[i - 1]) if i != 0 else 0 for i in range(len(total))] + +# Step 4: Print growth rates for quarter, rate in zip(quarters[1:], growth_rates[1:]): - print(f"{quarter}: {rate}") + print(f"{quarter}: {rate}%") -# Plotting +# Step 5: Plotting x = np.arange(len(quarters)) # the label locations -width = 0.1 # the width of the bars +width = 0.15 # the width of the bars bar_positions = [x - 2 * width, x - width, x, x + width, x + 2 * width, x + 3 * width] -bar_labels = ['Data Center', 'Gaming', 'Professional Visualization', 'Auto', 'OEM & Other', 'Total'] +bar_labels = ['data_center', 'gaming', 'professional_visualization', 'auto', 'oem_other', 'total'] +bar_data = [data_center, gaming, professional_visualization, auto, oem_other, total] fig, ax = plt.subplots(figsize=(14, 8)) rects = [] -for pos, label in zip(bar_positions, bar_labels): - rect = ax.bar(pos, eval(replace_text(label)), width, label=label) +for pos, label, data in zip(bar_positions, bar_labels, bar_data): + rect = ax.bar(pos, data, width, label=replace_text(label)) rects.append(rect) -# Add growth rate annotations +# Step 6: Add growth rate annotations for i, rate in enumerate(growth_rates): - ax.annotate(f'{rate}', (x[i], total[i]), textcoords="offset points", xytext=(0, 0), ha='center') + ax.annotate(f'{rate}%', (x[i], total[i]), textcoords="offset points", xytext=(0, 5), ha='center') -# Add some text for labels, title and custom x-axis tick labels, etc. +# Step 7: Add some text for labels, title, and custom x-axis tick labels, etc. ax.set_xlabel('Quarter') ax.set_ylabel('Revenue ($ in millions)') ax.set_title('NVIDIA Quarterly Revenue Trend by Market') @@ -48,7 +53,7 @@ # Rotate the tick labels for better readability plt.xticks(rotation=45) +# Step 8: Adjust layout and save the figure fig.tight_layout() - plt.savefig('nvidia-revenue-trend.png') plt.show() diff --git a/nvidia-revenue-trend.png b/nvidia-revenue-trend.png index be609f7..caae855 100644 Binary files a/nvidia-revenue-trend.png and b/nvidia-revenue-trend.png differ diff --git a/read_pdf.py b/read_pdf.py new file mode 100644 index 0000000..bb51518 --- /dev/null +++ b/read_pdf.py @@ -0,0 +1,36 @@ +import pdfplumber + +from utils.replace_text import replace_text + + +def extract_data_from_pdf(pdf_path): + data = {} + + try: + with pdfplumber.open(pdf_path) as pdf: + # Assuming the relevant data is on the first page + page = pdf.pages[0] + table = page.extract_table() + + if table: + # Extract quarters from the first row, skipping the first column header + quarters = table[0][1:] + data['quarters'] = quarters[::-1] + + # Process the rest of the rows, skipping the first row (headers) + for row in table[1:]: + if row: # Ensure the row is not empty + key = replace_text(row[0].lower()) + values = [int(item.replace('$', '').replace(',', '')) for item in row[1:] if item] + data[key] = values[::-1] + else: + raise ValueError("No table found on the first page.") + + except FileNotFoundError: + print(f"Error: The file '{pdf_path}' was not found.") + except ValueError as ve: + print(f"Error processing the PDF: {ve}") + except Exception as e: + print(f"An unexpected error occurred: {e}") + + return data diff --git a/requirements.txt b/requirements.txt index f688939..cb4b9fb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,7 @@ +cffi==1.16.0 +charset-normalizer==3.3.2 contourpy==1.2.1 +cryptography==42.0.7 cycler==0.12.1 fonttools==4.52.1 iniconfig==2.0.0 @@ -6,9 +9,13 @@ kiwisolver==1.4.5 matplotlib==3.9.0 numpy==1.26.4 packaging==24.0 +pdfminer.six==20231228 +pdfplumber==0.11.0 pillow==10.3.0 pluggy==1.5.0 +pycparser==2.22 pyparsing==3.1.2 +pypdfium2==4.30.0 pytest==8.2.1 python-dateutil==2.9.0.post0 six==1.16.0