Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

📊 Add data on battery cell prices #3660

Draft
wants to merge 9 commits into
base: master
Choose a base branch
from
16 changes: 16 additions & 0 deletions dag/energy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -279,3 +279,19 @@ steps:
#
export://multidim/energy/latest/energy_prices:
- data://grapher/energy/2024-11-20/energy_prices
#
# Benchmark Mineral Intelligence - Battery cell prices.
#
data-private://meadow/benchmark_mineral_intelligence/2024-11-29/battery_cell_prices:
- snapshot-private://benchmark_mineral_intelligence/2024-11-29/battery_cell_prices.xlsx
- snapshot-private://benchmark_mineral_intelligence/2024-11-29/battery_cell_prices_by_chemistry.xlsx
#
# Benchmark Mineral Intelligence - Battery cell prices.
#
data-private://garden/benchmark_mineral_intelligence/2024-11-29/battery_cell_prices:
- data-private://meadow/benchmark_mineral_intelligence/2024-11-29/battery_cell_prices
#
# Benchmark Mineral Intelligence - Battery cell prices.
#
data-private://grapher/benchmark_mineral_intelligence/2024-11-29/battery_cell_prices:
- data-private://garden/benchmark_mineral_intelligence/2024-11-29/battery_cell_prices
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
definitions:
common:
processing_level: minor
presentation:
topic_tags:
- Energy

dataset:
non_redistributable: true
update_period_days: 365

tables:
battery_cell_prices:
variables:
battery_cell_price:
title: "Annual average battery cell price"
unit: "current US$ per kilowatt-hour"
short_unit: "$/kWh"
description_short: |-
Average annual price of lithium ion battery cells per [kilowatt-hour](#dod:watt-hours).
display:
numDecimalPlaces: 2
battery_cell_prices_by_chemistry:
variables:
ncm_battery_cell_price:
title: Quarterly NCM battery cell price
unit: "current US$ per kilowatt-hour"
short_unit: "$/kWh"
description_short: |-
Average quarterly price of lithium nickel manganese cobalt oxide (NCM) battery cells per [kilowatt-hour](#dod:watt-hours). Prices are expressed in US dollars, not adjusted for inflation.
display:
numDecimalPlaces: 2
lfp_battery_cell_price:
title: Quarterly LFP battery cell price
unit: "current US$ per kilowatt-hour"
short_unit: "$/kWh"
description_short: |-
Average quarterly price of lithium iron phosphate (LFP) battery cells per [kilowatt-hour](#dod:watt-hours). Prices are expressed in US dollars, not adjusted for inflation.
display:
numDecimalPlaces: 2
battery_cell_prices_combined:
variables:
price:
title: Battery cell prices
unit: "current US$ per kilowatt-hour"
short_unit: "$/kWh"
description_short: |-
Average price of battery cells per [kilowatt-hour](#dod:watt-hours). The data includes quarterly average prices of different types of lithium ion batteries, as well as an annual average. Prices are expressed in US dollars, not adjusted for inflation.
description_key:
- Lithium iron phosphate (LFP) and lithium nickel manganese cobalt oxide (NCM) are two types of rechargeable batteries commonly used in electric vehicles and renewable energy storage.
display:
numDecimalPlaces: 2
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""Load a meadow dataset and create a garden dataset."""

import owid.catalog.processing as pr
from owid.datautils.dataframes import map_series

from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)


# Select and rename columns.
COLUMNS_ANNUAL = {
"year": "year",
"global_avg__cell_price__dollar_kwh": "battery_cell_price",
}
COLUMNS_QUARTERLY = {
"date": "date",
"ncm_weighted_average_cell_price": "ncm_battery_cell_price",
"lfp_weighted_average_cell_price": "lfp_battery_cell_price",
}
# Mapping of battery chemistries.
CHEMISTRY_MAPPING = {
"battery_cell_price": "Average",
"ncm_battery_cell_price": "NCM",
"lfp_battery_cell_price": "LFP",
}


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Load meadow dataset.
ds_meadow = paths.load_dataset("battery_cell_prices")

# Read table on annual data of historical battery prices (since 2014).
tb_annual = ds_meadow.read("battery_cell_prices")

# Read table on quarterly data of battery prices by chemistry.
tb_quarterly = ds_meadow.read("battery_cell_prices_by_chemistry")

#
# Process data.
#
# Process annual data on historical battery prices.

# Select and rename columns.
tb_annual = tb_annual[COLUMNS_ANNUAL.keys()].rename(columns=COLUMNS_ANNUAL, errors="raise")

# Clean year column.
tb_annual["year"] = tb_annual["year"].str.strip().str[0:4].astype("Int64")

# Add country column.
tb_annual["country"] = "World"

# Process quarterly data on battery prices by chemistry.

# Select and rename columns.
tb_quarterly = tb_quarterly[COLUMNS_QUARTERLY.keys()].rename(columns=COLUMNS_QUARTERLY, errors="raise")

# Clean date column.
quarter_to_date = {"Q1": "-02-15", "Q2": "-05-15", "Q3": "-08-15", "Q4": "-11-15"}
tb_quarterly["date"] = [date[-4:] + quarter_to_date[date[:2]] for date in tb_quarterly["date"]]

# Add country column.
tb_quarterly["country"] = "World"

# Create a combined table.

# For annual data, assume the date is July 1st of each year.
_tb_annual = tb_annual.copy()
_tb_annual["date"] = _tb_annual["year"].astype(str) + "-07-01"

# Combine tables.
tb_combined = pr.concat([_tb_annual.drop(columns=["year"]), tb_quarterly])

# Remove country column, and use the battery chemistry as "country" instead.
tb_combined = tb_combined.drop(columns=["country"]).melt(id_vars=["date"], var_name="chemistry", value_name="price")

# Rename battery chemistries.
tb_combined["chemistry"] = map_series(
tb_combined["chemistry"], CHEMISTRY_MAPPING, warn_on_missing_mappings=True, warn_on_unused_mappings=True
)

# Remove empty rows.
tb_combined = tb_combined.dropna().reset_index(drop=True)

# Improve table formats.
tb_annual = tb_annual.format(["country", "year"])
tb_quarterly = tb_quarterly.format(["country", "date"], short_name="battery_cell_prices_by_chemistry")
tb_combined = tb_combined.format(["chemistry", "date"], short_name="battery_cell_prices_combined")

#
# Save outputs.
#
# Create a new garden dataset.
ds_garden = create_dataset(
dest_dir,
tables=[tb_annual, tb_quarterly, tb_combined],
check_variables_metadata=True,
default_metadata=ds_meadow.metadata,
)
ds_garden.save()
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""Load a garden dataset and create a grapher dataset."""

from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Load garden dataset.
ds_garden = paths.load_dataset("battery_cell_prices")

# Read combined table from garden dataset.
tb = ds_garden.read("battery_cell_prices_combined")

#
# Process data.
#
# Rename columns to adapt to grapher requirements.
tb = tb.rename(columns={"chemistry": "country"}, errors="raise")

# Improve table format.
tb = tb.format(["country", "date"])

#
# Save outputs.
#
# Create a new grapher dataset with the same metadata as the garden dataset.
ds_grapher = create_dataset(
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata
)

# Save changes in the new grapher dataset.
ds_grapher.save()
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""Load a snapshot and create a meadow dataset."""

from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Retrieve snapshots.
snap = paths.load_snapshot("battery_cell_prices.xlsx")
snap_by_chemistry = paths.load_snapshot("battery_cell_prices_by_chemistry.xlsx")

# Load data from snapshots.
tb = snap.read(skiprows=8)
tb_by_chemistry = snap_by_chemistry.read(skiprows=7)

#
# Process data.
#
# Remove empty columns.
tb = tb.dropna(axis=1, how="all")
tb_by_chemistry = tb_by_chemistry.dropna(axis=1, how="all")

# Ensure all columns are snake-case, set an appropriate index, and sort conveniently.
tb = tb.format(["year"])
tb_by_chemistry = tb_by_chemistry.format(["date"])

#
# Save outputs.
#
# Create a new meadow dataset.
ds_meadow = create_dataset(
dest_dir, tables=[tb, tb_by_chemistry], check_variables_metadata=True, default_metadata=snap.metadata
)
ds_meadow.save()
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""Script to create a snapshot of dataset."""

from pathlib import Path

import click

from etl.snapshot import Snapshot

# Version for current snapshot dataset.
SNAPSHOT_VERSION = Path(__file__).parent.name


@click.command()
@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot")
@click.option("--path-to-file", "-f", prompt=True, type=str, help="Path to local data file.")
def main(path_to_file: str, upload: bool) -> None:
# Create a new snapshot.
snap = Snapshot(f"benchmark_mineral_intelligence/{SNAPSHOT_VERSION}/battery_cell_prices.xlsx")

# Copy local data file to snapshots data folder, add file to DVC and upload to S3.
snap.create_snapshot(filename=path_to_file, upload=upload)


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Learn more at:
# http://docs.owid.io/projects/etl/architecture/metadata/reference/
meta:
origin:
# Data product / Snapshot
title: Lithium ion batteries prices
description: |-
Historic prices on lithium ion battery cells.
date_published: "2024-11-26"

# Citation
producer: Benchmark Mineral Intelligence
citation_full: |-
Benchmark Mineral Intelligence - Lithium ion batteries prices (2024).

# Files
url_main: https://www.benchmarkminerals.com/lithium-ion-batteries/lithium-ion-batteries-prices/table?utm_source=Our%20World%20in%20Data&utm_medium=Website&utm_campaign=Battery%20cell%20data
date_accessed: 2024-11-29

# License
license:
name: © Benchmark Mineral Intelligence 2024
url: https://www.benchmarkminerals.com/lithium-ion-batteries/lithium-ion-batteries-prices/table

is_public: false

outs:
- md5: 525d53a612d1552b1ba4bfe480a9f1b8
size: 153096
path: battery_cell_prices.xlsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""Script to create a snapshot of dataset."""

from pathlib import Path

import click

from etl.snapshot import Snapshot

# Version for current snapshot dataset.
SNAPSHOT_VERSION = Path(__file__).parent.name


@click.command()
@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot")
@click.option("--path-to-file", "-f", prompt=True, type=str, help="Path to local data file.")
def main(path_to_file: str, upload: bool) -> None:
# Create a new snapshot.
snap = Snapshot(f"benchmark_mineral_intelligence/{SNAPSHOT_VERSION}/battery_cell_prices_by_chemistry.xlsx")

# Copy local data file to snapshots data folder, add file to DVC and upload to S3.
snap.create_snapshot(filename=path_to_file, upload=upload)


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Learn more at:
# http://docs.owid.io/projects/etl/architecture/metadata/reference/
meta:
origin:
# Data product / Snapshot
title: Batteries prices by chemistry
description: |-
Prices of different types of battery cells.
date_published: "2024-11-26"

# Citation
producer: Benchmark Mineral Intelligence
citation_full: |-
Benchmark Mineral Intelligence - Batteries prices by chemistry (2024).

# Files
url_main: https://www.benchmarkminerals.com/lithium-ion-batteries/lithium-ion-batteries-prices/table?utm_source=Our%20World%20in%20Data&utm_medium=Website&utm_campaign=Battery%20cell%20data
date_accessed: 2024-11-29

# License
license:
name: © Benchmark Mineral Intelligence 2024
url: https://www.benchmarkminerals.com/lithium-ion-batteries/lithium-ion-batteries-prices/table

is_public: false
outs:
- md5: 3ddaab5198d57080c72e87760df8b26a
size: 153127
path: battery_cell_prices_by_chemistry.xlsx
Loading