batch image correlation 1 connections #10
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Batch process many image pairs | |
name: batch_image_correlation | |
run-name: batch image correlation ${{ inputs.npairs }} connections | |
on: | |
workflow_dispatch: | |
inputs: | |
cloud_cover: | |
type: string | |
required: true | |
description: percent cloud cover allowed in images (0-100) | |
default: '10' | |
start_month: | |
type: choice | |
required: true | |
description: first month of year to search for images | |
default: '6' | |
options: ['1','2','3','4','5','6','7','8','9','10','11','12'] | |
stop_month: | |
type: choice | |
required: true | |
description: last month of year to search for images | |
default: '9' | |
options: ['1','2','3','4','5','6','7','8','9','10','11','12'] | |
npairs: | |
type: choice | |
required: true | |
description: number of pairs per image | |
default: '1' | |
options: ['3','2','1'] | |
# Must duplicate inputs for workflow_call (https://github.com/orgs/community/discussions/39357) | |
workflow_call: | |
inputs: | |
cloud_cover: | |
type: string | |
required: true | |
start_month: | |
type: string | |
required: true | |
stop_month: | |
type: string | |
required: true | |
npairs: | |
type: string | |
required: true | |
jobs: | |
# The output of this job is a JSON mapping for a matrix job | |
S2_search: | |
runs-on: ubuntu-latest | |
outputs: | |
BURST_IDS: ${{ steps.S2_search.outputs.IMAGE_DATES }} | |
MATRIX: ${{ steps.S2_search.outputs.MATRIX_PARAMS_COMBINATIONS }} | |
defaults: | |
run: | |
shell: bash -el {0} | |
steps: | |
- name: Checkout Repository | |
uses: actions/checkout@v4 | |
- name: Install Conda environment with Micromamba | |
uses: mamba-org/setup-micromamba@v1 | |
with: | |
cache-environment: true | |
environment-file: glacier_image_correlation/environment.yml | |
# https://words.yuvi.in/post/python-in-github-actions/ | |
- name: Search aws for S2 imagery | |
id: S2_search | |
shell: bash -el -c "python -u {0}" | |
run: | | |
import xarray as xr | |
import os | |
import pystac | |
import pystac_client | |
import stackstac | |
from dask.distributed import Client | |
import dask | |
import json | |
import pandas as pd | |
# GDAL environment variables for better performance | |
os.environ['AWS_REGION']='us-west-2' | |
os.environ['GDAL_DISABLE_READDIR_ON_OPEN']='EMPTY_DIR' | |
os.environ['AWS_NO_SIGN_REQUEST']='YES' | |
# hardcode bbox for now | |
bbox = { | |
"type": "Polygon", | |
"coordinates": [ | |
[[75.42382800808971,36.41082887114753], | |
[75.19442677164156,36.41082887114753], | |
[75.19442677164156,36.201076360872946], | |
[75.42382800808971,36.201076360872946], | |
[75.42382800808971,36.41082887114753]]] | |
} | |
# Use the api from element84 to query the data | |
URL = "https://earth-search.aws.element84.com/v1" | |
catalog = pystac_client.Client.open(URL) | |
search = catalog.search( | |
collections=["sentinel-2-l2a"], | |
intersects=bbox, | |
query={"eo:cloud_cover": {"lt": ${{ inputs.cloud_cover }}}} | |
) | |
# Check how many items were returned | |
items = search.item_collection() | |
print(f"Returned {len(items)} Items") | |
# create xarray dataset without loading data | |
sentinel2_stack = stackstac.stack(items) | |
# filter to specified month range | |
sentinel2_stack_snowoff = sentinel2_stack.where((sentinel2_stack.time.dt.month >= ${{ inputs.start_month }}) & (sentinel2_stack.time.dt.month <= ${{ inputs.stop_month }}), drop=True) | |
# select first image of each month | |
period_index = pd.PeriodIndex(sentinel2_stack_snowoff['time'].values, freq='M') | |
sentinel2_stack_snowoff.coords['year_month'] = ('time', period_index) | |
first_image_indices = sentinel2_stack_snowoff.groupby('year_month').apply(lambda x: x.isel(time=0)) | |
product_names = first_image_indices['s2:product_uri'].values.tolist() | |
print('\n'.join(product_names)) | |
# Create Matrix Job Mapping (JSON Array) | |
pairs = [] | |
for r in range(len(product_names) - ${{ inputs.npairs }}): | |
for s in range(1, ${{ inputs.npairs }} + 1 ): | |
img1_product_name = product_names[r] | |
img2_product_name = product_names[r+s] | |
shortname = f'{img1_product_name[11:19]}_{img2_product_name[11:19]}' | |
pairs.append({'img1_product_name': img1_product_name, 'img2_product_name': img2_product_name, 'name':shortname}) | |
matrixJSON = f'{{"include":{json.dumps(pairs)}}}' | |
print(f'number of image pairs: {len(pairs)}') | |
with open(os.environ['GITHUB_OUTPUT'], 'a') as f: | |
print(f'IMAGE_DATES={product_names}', file=f) | |
print(f'MATRIX_PARAMS_COMBINATIONS={matrixJSON}', file=f) | |
# A matrix job that calls a reuseable workflow | |
autoRIFT: | |
needs: S2_search | |
strategy: | |
fail-fast: false | |
matrix: ${{ fromJson(needs.S2_search.outputs.MATRIX) }} | |
name: ${{ matrix.name }} | |
uses: ./.github/workflows/image_correlation_pair.yml | |
with: | |
img1_product_name: ${{ matrix.img1_product_name }} | |
img2_product_name: ${{ matrix.img2_product_name }} | |
workflow_name: ${{ matrix.name }} | |
summary_statistics: | |
needs: [S2_search, autoRIFT] | |
name: summary_statistics | |
uses: ./.github/workflows/summary_statistics.yml |