Downloads the odc-colab Python module and runs it to setup ODC.
!wget -nc https://raw.githubusercontent.com/ceos-seo/odc-colab/master/odc_colab.py
from odc_colab import odc_colab_init
odc_colab_init(install_odc_gee=True)
Downloads an existing index and populates the new ODC environment with it.
from odc_colab import populate_db
populate_db()
This notebook explores sample Sentinel-2 data from Google Earth Engine which has been indexed to work with the Open Data Cube. The data is used to calculate cloud statistics for selected regions. This is valuable information for performing analysis. For example, if there are extensive clouds for a season it may significantly impact the mosaic product or index values. Another example is that a user may want to find a single date when there are few clouds to assess land features.
Look for the "MODIFY HERE" tags to identify code blocks for user editing. These areas will allow the user to change the region, time window and output time slice.
# Load Data Cube Configuration
from odc_gee import earthengine
dc = earthengine.Datacube(app='Cloud_Statistics')
# Import Data Cube API
import utils.data_cube_utilities.data_access_api as dc_api
api = dc_api.DataAccessApi()
# Import Utilities
from utils.data_cube_utilities.dc_display_map import display_map
from utils.data_cube_utilities.dc_rgb import rgb
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Select a Product and Platform
# This data is indexed from Google Earth Engine data sources
product = "s2_google"
platform = "Sentinel-2A"
# MODIFY HERE
# Select a Latitude-Longitude point for the center of the analysis region
# Select the size of the box (in degrees) surrounding the center point
# Mombasa, Kenya
lat_long = (-4.03, 39.62)
box_size_deg = 0.15
# Calculate the latitude and longitude bounds of the analysis box
latitude = (lat_long[0]-box_size_deg/2, lat_long[0]+box_size_deg/2)
longitude = (lat_long[1]-box_size_deg/2, lat_long[1]+box_size_deg/2)
# Select a time range
# The inputs require a format (Min,Max) using this date format (YYYY-MM-DD)
# The Sentinel-2 allowable time range is: 2017-03-28 to current
time_extents = ('2021-01-01', '2021-03-01')
# Display the analysis region
# Click on the plot to get Lat-Lon coordinates to adjust the region
# Zoom in/out on the plot to move around the globe for other regions
display_map(latitude,longitude)
#@title Build Cloud Coverage Table
# Build Cloud Coverage Table
def build_cloud_coverage_table_sentinel(product,
platform,
latitude,
longitude,
time=None,
dc=None,
extra_band='green'):
load_params = dict(product = product,
platform = platform,
latitude = latitude,
longitude = longitude,
group_by='solar_day',
measurements = [extra_band, 'scl'])
if time is not None:
load_params["time"] = time
geo_data = dc.load(**load_params)
times = list(geo_data.time.values)
scene_slice_list = list(map(lambda t: geo_data.sel(time = t), times))
clean_mask_list = (geo_data.scl.values != 0) & (geo_data.scl.values != 1) & \
(geo_data.scl.values != 3) & (geo_data.scl.values != 8) & \
(geo_data.scl.values != 9) & (geo_data.scl.values != 10)
percentage_list = [clean_mask.mean()*100 for clean_mask in clean_mask_list]
clean_pixel_count_list = list(map(np.sum, clean_mask_list))
data = {"times": times,
# "clean_percentage" is the percent of pixels that are not no_data which are clear.
"clean_percentage": percentage_list,
"clean_count": clean_pixel_count_list }
return geo_data, pd.DataFrame(data=data, columns = ["times", "clean_percentage", "clean_count"])
# Load the data and calculate the cloud coverage for each time slice
sentinel_dataset, coverage_table = build_cloud_coverage_table_sentinel(product = product,
platform = platform,
latitude = latitude,
longitude = longitude,
time = time_extents,
dc = dc,
extra_band = 'green')
This table displays each time slice in the cube (starting at ZERO). The "clean percentage" is the percent of pixels WITHOUT clouds. So, low numbers are cloudy scenes and high numbers are clearer scenes. The "clean count" is the number of clear pixels in the scene.
Typically, there is a separation of 5 days between Sentinel-2 scenes for a single location. This considers the availability of two missions (A and B) which is the case for most places in the world. When there is significant cloud cover, scenes may be missing from time series due to issues with processing and geolocation.
pd.set_option('display.max_rows', len(coverage_table))
coverage_table
times | clean_percentage | clean_count | |
---|---|---|---|
0 | 2021-01-05 07:51:04.424 | 7.001867 | 195392 |
1 | 2021-01-10 07:51:03.306 | 0.000000 | 0 |
2 | 2021-01-15 07:51:04.809 | 13.699961 | 382307 |
3 | 2021-01-20 07:51:03.805 | 0.583322 | 16278 |
4 | 2021-01-25 07:51:04.511 | 17.278477 | 482168 |
5 | 2021-01-30 07:51:03.678 | 1.728357 | 48231 |
6 | 2021-02-04 07:51:03.564 | 94.684276 | 2642231 |
7 | 2021-02-09 07:51:02.833 | 0.000000 | 0 |
8 | 2021-02-14 07:51:02.832 | 46.424171 | 1295499 |
9 | 2021-02-19 07:51:01.594 | 91.474717 | 2552666 |
10 | 2021-02-24 07:51:03.656 | 41.204772 | 1149848 |
11 | 2021-03-01 07:51:02.777 | 71.671092 | 2000032 |
plt.figure(figsize = (15,5))
plt.plot(coverage_table["times"].values, coverage_table["clean_percentage"].values, 'bo', markersize=8)
plt.show()
# Load the data to create an RGB image
sentinel_dataset = dc.load(latitude = latitude,
longitude = longitude,
platform = platform,
time = time_extents,
product = product,
group_by='solar_day',
measurements = ['red', 'green', 'blue', 'nir', 'swir1', 'swir2'])
# MODIFY HERE
# Select one of the time slices and create an RGB image.
# Time slices are numbered from 0 to x and shown in the table above
# Review the clean_percentage values above to select scenes with few clouds
# Clouds will be visible in WHITE for an RGB image
# RGB image options
# True-Color RGB = Red, Green, Blue
# False Color RGB (Mosaic) = SWIR2, NIR, Green
slice = 6 # select the time slice number here
fig, ax = plt.subplots(1, 2, figsize=(16, 8))
true_rgb = sentinel_dataset.isel(time=slice)[['red', 'green', 'blue']].to_array()
false_rgb = sentinel_dataset.isel(time=slice)[['swir2', 'nir', 'green']].to_array()
true_rgb.plot.imshow(ax=ax[0], vmin=0, vmax=2000)
false_rgb.plot.imshow(ax=ax[1], vmin=0, vmax=5000)
ax[0].set_title('True Color'), ax[0].xaxis.set_visible(False), ax[0].yaxis.set_visible(False)
ax[1].set_title('False Color'), ax[1].xaxis.set_visible(False), ax[1].yaxis.set_visible(False)
plt.show()