#!/usr/bin/env uv run python3

from google.cloud import bigquery
import pandas as pd
import os

print('Starting...')

# Set your project
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/Users/reuven/Downloads/bw-151-0ffffc35f2a0.json'

# Create client with explicit project
client = bigquery.Client(project='bw-151')

query = """
SELECT
    DATE_TRUNC(DATE(timestamp), MONTH) as month,
    file.project as package,
    COUNT(*) as downloads
FROM
    `bigquery-public-data.pypi.file_downloads`
WHERE
    DATE(timestamp) BETWEEN '2025-01-01' AND '2025-12-31'
    AND file.project IN ('pandas', 'polars', 'jupyter', 'marimo')
GROUP BY
    month, package
ORDER BY
    month, package
"""

# Run query and get results as pandas DataFrame
df = client.query(query).to_dataframe()

print(f'Downloaded {len(df.index)} records. Saving to parquet...')
df.to_parquet('bw-151-comparison.parquet')
print('Done.')
