Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

📊 WDI update #3767

Draft
wants to merge 13 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion apps/wizard/app_pages/chart_diff/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@

from apps.wizard.app_pages.chart_diff.chart_diff import get_chart_diffs_from_grapher
from apps.wizard.app_pages.chart_diff.chart_diff_show import st_show
from apps.wizard.app_pages.chart_diff.utils import WARN_MSG, get_engines
from apps.wizard.app_pages.chart_diff.utils import WARN_MSG, get_engines, indicators_in_charts
from apps.wizard.utils import set_states
from apps.wizard.utils.components import Pagination
from etl.config import OWID_ENV
from etl.grapher import model as gm

log = get_logger()

Expand Down Expand Up @@ -81,6 +82,9 @@ def get_chart_diffs():
)
)

# Get indicators used in charts
st.session_state.indicators_in_charts = indicators_in_charts(list(st.session_state.chart_diffs.keys()))

# Init, can be changed by the toggle
st.session_state.chart_diffs_filtered = st.session_state.chart_diffs

Expand All @@ -106,6 +110,16 @@ def _slugs_match(chart_slug_1, chart_slug_2):
# Filter based on query params
if "chart_id" in st.query_params:
chart_ids = list(map(int, st.query_params.get_all("chart_id")))
st.session_state.chart_diffs_filtered = {
k: v for k, v in st.session_state.chart_diffs_filtered.items() if v.chart_id in chart_ids
}
if "indicator_id" in st.query_params:
indicator_ids = list(map(int, st.query_params.get_all("indicator_id")))

# Get all charts containing any of the selected indicators
with Session(SOURCE_ENGINE) as session:
chart_ids = gm.ChartDimensions.chart_ids_with_indicators(session, indicator_ids)

st.session_state.chart_diffs_filtered = {
k: v for k, v in st.session_state.chart_diffs_filtered.items() if v.chart_id in chart_ids
}
Expand Down Expand Up @@ -189,6 +203,8 @@ def _apply_search_filters(session_key, query_key):

# Chart ID filter
_apply_search_filters("chart-diff-filter-id", "chart_id")
# Indicator filter
_apply_search_filters("chart-diff-filter-indicator", "indicator_id")
# Slug filter
_apply_search_filters("chart-diff-filter-slug", "chart_slug")
# Change type filter
Expand Down Expand Up @@ -237,6 +253,15 @@ def _apply_search_filters(session_key, query_key):
key="chart-diff-filter-slug",
help="Filter chart diffs with charts with slugs containing any of the given words (fuzzy match).",
)
st.multiselect(
label="Select indicators",
options=sorted(st.session_state.indicators_in_charts.keys()),
format_func=lambda s: f"[{s}] {st.session_state.indicators_in_charts[s]}",
default=[int(n) for n in st.query_params.get_all("indicator_id")], # type: ignore
key="chart-diff-filter-indicator",
help="Filter chart diffs to charts containing any of the selected indicators.",
placeholder="Select indicator IDs",
)

st.form_submit_button(
"Apply filters",
Expand Down
11 changes: 11 additions & 0 deletions apps/wizard/app_pages/chart_diff/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@

import streamlit as st
from sqlalchemy.engine.base import Engine
from sqlalchemy.orm import Session
from structlog import get_logger

from etl import config
from etl.config import OWID_ENV, OWIDEnv
from etl.grapher import model as gm

log = get_logger()

Expand Down Expand Up @@ -40,3 +42,12 @@ def prettify_date(chart):
return chart.updatedAt.strftime("%b %d, %H:%M")
else:
return chart.updatedAt.strftime("%b %d, %Y %H:%M")


@st.cache_data
def indicators_in_charts(chart_ids: list[int]) -> dict[int, str]:
# Get a list of used indicators in chart diffs
with Session(SOURCE.engine) as session:
indicator_ids = gm.ChartDimensions.indicators_in_charts(session, chart_ids)
rows = gm.Variable.from_id(session, variable_id=list(indicator_ids), columns=["id", "name"])
return {r.id: r.name for r in rows} # type: ignore
15 changes: 15 additions & 0 deletions dag/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,7 @@ steps:
- data://garden/news/2024-05-23/gdelt_v2

# World Development Indicators - WDI
# TO BE ARCHIVED
data://meadow/worldbank_wdi/2024-05-20/wdi:
- snapshot://worldbank_wdi/2024-05-20/wdi.zip

Expand All @@ -518,6 +519,20 @@ steps:
data://grapher/worldbank_wdi/2024-05-20/wdi:
- data://garden/worldbank_wdi/2024-05-20/wdi

# World Development Indicators - WDI
data://meadow/worldbank_wdi/2025-01-24/wdi:
- snapshot://worldbank_wdi/2025-01-24/wdi.zip

data://garden/worldbank_wdi/2025-01-24/wdi:
- snapshot://worldbank_wdi/2025-01-24/wdi.zip
- data://meadow/worldbank_wdi/2025-01-24/wdi
- data://garden/demography/2024-07-15/population
- data://garden/regions/2023-01-01/regions
- data://garden/wb/2024-07-29/income_groups

data://grapher/worldbank_wdi/2025-01-24/wdi:
- data://garden/worldbank_wdi/2025-01-24/wdi

#
# Aviation Safety Network - Aviation Statistics.
#
Expand Down
25 changes: 21 additions & 4 deletions etl/grapher/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -1401,10 +1401,12 @@ def from_catalog_path(
) -> "Variable" | List["Variable"]:
"""Load a variable from the DB by its catalog path."""
assert "#" in catalog_path, "catalog_path should end with #indicator_short_name"
# Return Variable if columns is None and return Row object if columns is provided
execute = session.execute if columns else session.scalars
if isinstance(catalog_path, str):
return session.scalars(_select_columns(cls, columns).where(cls.catalogPath == catalog_path)).one()
return execute(_select_columns(cls, columns).where(cls.catalogPath == catalog_path)).one() # type: ignore
elif isinstance(catalog_path, list):
return session.scalars(_select_columns(cls, columns).where(cls.catalogPath.in_(catalog_path))).all() # type: ignore
return execute(_select_columns(cls, columns).where(cls.catalogPath.in_(catalog_path))).all() # type: ignore

@overload
@classmethod
Expand All @@ -1421,10 +1423,13 @@ def from_id(
cls, session: Session, variable_id: int | List[int], columns: Optional[List[str]] = None
) -> "Variable" | List["Variable"]:
"""Load a variable (or list of variables) from the DB by its ID path."""
# Return Variable if columns is None and return Row object if columns is provided
execute = session.execute if columns else session.scalars

if isinstance(variable_id, int):
return session.scalars(_select_columns(cls, columns).where(cls.id == variable_id)).one()
return execute(_select_columns(cls, columns).where(cls.id == variable_id)).one() # type: ignore
elif isinstance(variable_id, list):
return session.scalars(_select_columns(cls, columns).where(cls.id.in_(variable_id))).all() # type: ignore
return execute(_select_columns(cls, columns).where(cls.id.in_(variable_id))).all() # type: ignore

@classmethod
def catalog_paths_to_variable_ids(cls, session: Session, catalog_paths: List[str]) -> Dict[str, int]:
Expand Down Expand Up @@ -1552,6 +1557,18 @@ class ChartDimensions(Base):
createdAt: Mapped[datetime] = mapped_column(DateTime, server_default=text("CURRENT_TIMESTAMP"), init=False)
updatedAt: Mapped[Optional[datetime]] = mapped_column(DateTime, init=False)

@classmethod
def chart_ids_with_indicators(cls, session: Session, indicator_ids: list[int]) -> list[int]:
"""Return a list of chart IDs that have any of the given indicators."""
query = select(cls.chartId).where(cls.variableId.in_(indicator_ids))
return list(session.scalars(query).all())

@classmethod
def indicators_in_charts(cls, session: Session, chart_ids: list[int]) -> set[int]:
"""Return a list of indicator IDs that are in any of the given charts."""
query = select(cls.variableId).where(cls.chartId.in_(chart_ids))
return set(session.scalars(query).all())


class Origin(Base):
"""Get CREATE TABLE statement for origins table with
Expand Down
12 changes: 9 additions & 3 deletions etl/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,8 @@ def get_dependency_step_name(
short_name=short_name,
is_private=is_private,
)
matches = [dependency for dependency in self.dependencies if bool(re.match(pattern, dependency))]
deps = self.dependencies
matches = _match_dependencies(pattern, deps)

# If no step was found and is_private was not specified, try again assuming step is private.
if (len(matches) == 0) and (is_private is None):
Expand All @@ -700,7 +701,7 @@ def get_dependency_step_name(
short_name=short_name,
is_private=True,
)
matches = [dependency for dependency in self.dependencies if bool(re.match(pattern, dependency))]
matches = _match_dependencies(pattern, self.dependencies)

# If not step was found and channel is "grapher", try again assuming this is a grapher://grapher step.
if (len(matches) == 0) and (channel == "grapher"):
Expand All @@ -712,7 +713,7 @@ def get_dependency_step_name(
short_name=short_name,
is_private=is_private,
)
matches = [dependency for dependency in self.dependencies if bool(re.match(pattern, dependency))]
matches = _match_dependencies(pattern, self.dependencies)

if len(matches) == 0:
raise NoMatchingStepsAmongDependencies(step_name=self.step_name)
Expand Down Expand Up @@ -798,6 +799,11 @@ def load_mdim_config(self, filename: Optional[str] = None, path: Optional[str |
return config


def _match_dependencies(pattern: str, dependencies: List[str]) -> List[str]:
regex = re.compile(pattern)
return [dependency for dependency in dependencies if regex.match(dependency)]


def print_tables_metadata_template(tables: List[Table], fields: Optional[List[str]] = None) -> None:
# This function is meant to be used when creating code in an interactive window (or a notebook).
# It prints a template for the metadata of the tables in the list.
Expand Down
Loading
Loading