Skip to content

Commit

Permalink
Add function to read PDF data
Browse files Browse the repository at this point in the history
  • Loading branch information
ruchernchong committed May 25, 2024
1 parent 2a4ed69 commit 9e311cc
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 21 deletions.
47 changes: 26 additions & 21 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,48 @@
import matplotlib.pyplot as plt
import numpy as np

import read_pdf
from utils.calculate_growth_rate import calculate_growth_rate
from utils.replace_text import replace_text

# Data
quarters = ["Q1 FY24", "Q2 FY24", "Q3 FY24", "Q4 FY24", "Q1 FY25"]
data_center = [4284, 10323, 14514, 18404, 22563]
gaming = [2240, 2486, 2856, 2865, 2647]
professional_visualization = [295, 379, 416, 463, 427]
auto = [296, 253, 261, 281, 329]
oem_other = [77, 66, 73, 90, 78]
total = [7192, 13507, 18120, 22103, 26044]
# Step 1: Extract data from the PDF
data = read_pdf.extract_data_from_pdf('Rev_by_Mkt_Qtrly_Trend_Q125.pdf')

# Calculate growth rates as percentages with + or -
growth_rates = [calculate_growth_rate(total[i], total[i - 1]) for i in range(len(total))]
# Step 2: Assign data to variables
quarters = data['quarters']
data_center = data['data_center']
gaming = data['gaming']
professional_visualization = data['professional_visualization']
auto = data['auto']
oem_other = data['oem_other']
total = data['total']

# Print growth rates
# Step 3: Calculate growth rates as percentages with + or -
growth_rates = [calculate_growth_rate(total[i], total[i - 1]) if i != 0 else 0 for i in range(len(total))]

# Step 4: Print growth rates
for quarter, rate in zip(quarters[1:], growth_rates[1:]):
print(f"{quarter}: {rate}")
print(f"{quarter}: {rate}%")

# Plotting
# Step 5: Plotting
x = np.arange(len(quarters)) # the label locations
width = 0.1 # the width of the bars
width = 0.15 # the width of the bars
bar_positions = [x - 2 * width, x - width, x, x + width, x + 2 * width, x + 3 * width]
bar_labels = ['Data Center', 'Gaming', 'Professional Visualization', 'Auto', 'OEM & Other', 'Total']
bar_labels = ['data_center', 'gaming', 'professional_visualization', 'auto', 'oem_other', 'total']
bar_data = [data_center, gaming, professional_visualization, auto, oem_other, total]

fig, ax = plt.subplots(figsize=(14, 8))

rects = []
for pos, label in zip(bar_positions, bar_labels):
rect = ax.bar(pos, eval(replace_text(label)), width, label=label)
for pos, label, data in zip(bar_positions, bar_labels, bar_data):
rect = ax.bar(pos, data, width, label=replace_text(label))
rects.append(rect)

# Add growth rate annotations
# Step 6: Add growth rate annotations
for i, rate in enumerate(growth_rates):
ax.annotate(f'{rate}', (x[i], total[i]), textcoords="offset points", xytext=(0, 0), ha='center')
ax.annotate(f'{rate}%', (x[i], total[i]), textcoords="offset points", xytext=(0, 5), ha='center')

# Add some text for labels, title and custom x-axis tick labels, etc.
# Step 7: Add some text for labels, title, and custom x-axis tick labels, etc.
ax.set_xlabel('Quarter')
ax.set_ylabel('Revenue ($ in millions)')
ax.set_title('NVIDIA Quarterly Revenue Trend by Market')
Expand All @@ -48,7 +53,7 @@
# Rotate the tick labels for better readability
plt.xticks(rotation=45)

# Step 8: Adjust layout and save the figure
fig.tight_layout()

plt.savefig('nvidia-revenue-trend.png')
plt.show()
Binary file modified nvidia-revenue-trend.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
36 changes: 36 additions & 0 deletions read_pdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import pdfplumber

from utils.replace_text import replace_text


def extract_data_from_pdf(pdf_path):
data = {}

try:
with pdfplumber.open(pdf_path) as pdf:
# Assuming the relevant data is on the first page
page = pdf.pages[0]
table = page.extract_table()

if table:
# Extract quarters from the first row, skipping the first column header
quarters = table[0][1:]
data['quarters'] = quarters[::-1]

# Process the rest of the rows, skipping the first row (headers)
for row in table[1:]:
if row: # Ensure the row is not empty
key = replace_text(row[0].lower())
values = [int(item.replace('$', '').replace(',', '')) for item in row[1:] if item]
data[key] = values[::-1]
else:
raise ValueError("No table found on the first page.")

except FileNotFoundError:
print(f"Error: The file '{pdf_path}' was not found.")
except ValueError as ve:
print(f"Error processing the PDF: {ve}")
except Exception as e:
print(f"An unexpected error occurred: {e}")

return data
7 changes: 7 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
cffi==1.16.0
charset-normalizer==3.3.2
contourpy==1.2.1
cryptography==42.0.7
cycler==0.12.1
fonttools==4.52.1
iniconfig==2.0.0
kiwisolver==1.4.5
matplotlib==3.9.0
numpy==1.26.4
packaging==24.0
pdfminer.six==20231228
pdfplumber==0.11.0
pillow==10.3.0
pluggy==1.5.0
pycparser==2.22
pyparsing==3.1.2
pypdfium2==4.30.0
pytest==8.2.1
python-dateutil==2.9.0.post0
six==1.16.0

0 comments on commit 9e311cc

Please sign in to comment.