#!/usr/bin/env uv run python3

from google.cloud import bigquery
import pandas as pd
import os

print('Starting...')

# Set your project
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/Users/reuven/Downloads/bw-151-0ffffc35f2a0.json'

# Create client with explicit project
client = bigquery.Client(project='bw-151')

query = """
WITH daily_package_counts AS (
  SELECT
    DATE_TRUNC(DATE(timestamp), MONTH) as download_month,
    file.project as package,
    COUNT(*) as downloads
  FROM
    `bigquery-public-data.pypi.file_downloads`
  WHERE
    DATE(timestamp) BETWEEN '2025-01-01' AND '2025-12-31'
  GROUP BY
    download_month, package
),
ranked_packages AS (
  SELECT
    download_month,
    package,
    downloads,
    ROW_NUMBER() OVER (PARTITION BY download_month ORDER BY downloads DESC) as rank
  FROM
    daily_package_counts
)
SELECT
  download_month,
  package,
  downloads,
  rank
FROM
  ranked_packages
WHERE
  rank <= 100
ORDER BY
  download_month, rank
"""

# Run query and get results as pandas DataFrame
df = client.query(query).to_dataframe()

print(f'Downloaded {len(df.index)} records. Saving to parquet...')
df.to_parquet('bw-151-packages-per-month.parquet')
print('Done.')
