# Example: Export a large collection to Google Drive
from gee_polygons import SiteCollection
from gee_polygons.datasets.mapbiomas import MAPBIOMAS_LULC
# Load 40,000 sites in lazy mode
sites = SiteCollection.from_geojson('all_sites.geojson', lazy=True)
# Configure export
destination = ExportDestination(
type='drive',
folder='restoration_extractions',
file_prefix='lulc_2024'
)
config = ExportConfig(
chunk_size=50, # 50 sites per task
max_concurrent=15 # Run 15 tasks at once
)
# Submit export (creates ~800 tasks)
task = sites.export_categorical(
layer=MAPBIOMAS_LULC,
years=range(2010, 2024),
destination=destination,
config=config
)
print(f"Submitted {len(task.task_ids)} export tasks")
# Monitor progress
task.wait(timeout_minutes=180)
# Check results
print(task.summary())
print(task.results_info())export
ExportDestination
Configuration for where to export results.
ExportDestination
def ExportDestination(
type:Literal['drive', 'cloud_storage'], folder:str, file_prefix:str='extraction',
file_format:Literal['CSV', 'GeoJSON']='CSV'
)->None:
Configuration for export destination.
Attributes: type: ‘drive’ for Google Drive, ‘cloud_storage’ for GCS folder: Drive folder name or GCS bucket/prefix file_prefix: Prefix for output filenames (default ‘extraction’) file_format: Output format - ‘CSV’ or ‘GeoJSON’ (default ‘CSV’)
ExportConfig
Configuration for export behavior.
ExportConfig
def ExportConfig(
chunk_size:int=50, max_concurrent:int=10, description_prefix:str='gee_polygons_export'
)->None:
Configuration for export behavior.
Attributes: chunk_size: Number of sites per export task (default 50) max_concurrent: Maximum concurrent tasks (default 10) description_prefix: Prefix for GEE task descriptions
ExportTask
Handle to running or completed export tasks. Provides methods to monitor progress, wait for completion, and retrieve results.
ExportTask
def ExportTask(
task_ids:List[str], destination:ExportDestination, config:ExportConfig, chunk_mapping:Dict[str, tuple]=<factory>
)->None:
Handle to running or completed export tasks.
Provides methods to monitor progress, wait for completion, and retrieve results information.
Attributes: task_ids: List of GEE task IDs destination: Export destination configuration config: Export configuration used chunk_mapping: Mapping of task_id to (start_idx, end_idx) tuples