# SPDX-License-Identifier: EUPL-1.2
# Contact: felixd@gfz.de, besnard@gfz.de, urbazaev@gfz.de and amelia.holcomb@gmail.com.
# SPDX-FileCopyrightText: 2026 Felix Dombrowski
# SPDX-FileCopyrightText: 2026 Mikhail Urbazaev
# SPDX-FileCopyrightText: 2026 Simon Besnard
# SPDX-FileCopyrightText: 2026 Amelia Holcomb
# SPDX-FileCopyrightText: 2026 Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences


"""
Querying Processed ICESat-2 Data from TileDB
============================================

This example demonstrates how to use the `IceSat2Provider` class to query and retrieve ICESat-2 data stored in TileDB arrays.
We will:

1. Set up the `IceSat2Provider` with a TileDB storage backend.
2. Query data using spatial and temporal filters.
3. Retrieve data in both `xarray.Dataset` and `pandas.DataFrame` formats.
4. Perform nearest-shot queries to retrieve ICESat-2 data closest to a given point.

Before running this example:

- Ensure that the ICESat-2 data has been processed and stored in TileDB arrays.
- Configure the TileDB storage settings (local or S3) based on your data location.

"""

import geopandas as gpd

import icesat2db as isdb

# Configure the TileDB storage backend
storage_type = "local"  # Options: "local" or "s3"
local_path = "/path/to/processed/icesat2/data"
s3_bucket = None  # Only required if using "s3"

# Initialize the IceSat2Provider
provider = isdb.IceSat2Provider(
    storage_type=storage_type,
    local_path=local_path,
    s3_bucket=s3_bucket,
)

# Define variables to retrieve
variables = ["latitude", "longitude", "rh100", "beam"]

# Define the spatial region of interest (ROI) as a GeoJSON file or GeoDataFrame
geojson_path = "/path/to/roi.geojson"
geometry = gpd.read_file(geojson_path)

# Define the temporal range
start_time = "2020-01-01"
end_time = "2020-12-31"

##############################################################
# -----------------------------------------------------------------
# Section 1: Bounding Box Query
# -----------------------------------------------------------------
print("=== Bounding Box Query ===")
print("Querying ICESat-2 data within a specified spatial and temporal range.")

# Query data within the bounding box and time range, and retrieve it as an `xarray.Dataset`
data_xarray = provider.get_data(
    variables=variables,
    geometry=geometry,
    start_time=start_time,
    end_time=end_time,
    return_type="xarray",
    query_type="bounding_box",
)

# Print the retrieved `xarray.Dataset`
print("Retrieved data as an xarray.Dataset:")
print(data_xarray)

##############################################################
# -----------------------------------------------------------------
# Section 2: Nearest Shot Query
# -----------------------------------------------------------------
print("\n=== Nearest Shot Query ===")
print("Querying ICESat-2 data for the nearest shots to a specific point.")

# Specify a geographic point (longitude, latitude) and the number of nearest shots
point = (-55.0, -10.0)  # Example longitude and latitude
num_shots = 5  # Retrieve the 5 nearest shots

# Query data for the nearest ICESat-2 shots
nearest_data = provider.get_data(
    variables=variables,
    point=point,
    num_shots=num_shots,
    query_type="nearest",
    return_type="xarray",
)

# Print the nearest ICESat-2 shots
print("Retrieved nearest ICESat-2 shots as an xarray.Dataset:")
print(nearest_data)

##############################################################
# -----------------------------------------------------------------
# Section 3: Data Format Options
# -----------------------------------------------------------------
print("\n=== Data Format Options ===")
print("Retrieving ICESat-2 data as a pandas.DataFrame for easier tabular analysis.")

# Query the data within the bounding box and retrieve it as a `pandas.DataFrame`
data_dataframe = provider.get_data(
    variables=variables,
    geometry=geometry,
    start_time=start_time,
    end_time=end_time,
    return_type="dataframe",
    query_type="bounding_box",
)

# Print the retrieved `pandas.DataFrame`
print("Retrieved data as a pandas.DataFrame:")
print(data_dataframe)
