# Load Libraries
import pandas as pd
import geopandas as gpd
import planetary_computer
import pystac_client
import rich.table
from geogif import gif
import numpy as np
import matplotlib.pyplot as plt
import rioxarray as rioxr
from IPython.display import Image
from shapely.geometry import box
import xarray as xr
import os
import rasterio
from rasterio.windows import from_bounds
from IPython.display import Image
import contextily as ctx
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
About:
Purpose: In 2021, Maricopa County —home to the Phoenix metropolitan area— was identified as the U.S. county with the most significant increase in developed land since 2001. This rapid urban sprawl has profound implications for biodiversity and the health of surrounding natural ecosystems.
In this notebook, I will investigate the impacts of urban expansion by analyzing a dataset that captures values for the Biodiversity Intactness Index (BII). Apecifically, I will examine changes in BII in the Phoenix county subdivision area between 2017 and 2020, shedding light on how urban growth affects biodiversity over time.
Highlights: 1. Accessing the Microsoft Planetary Computer to Access their Impact Observatory data.
Creating a plot of the Biodiversity Intactness Index within the Phoenix subdivision polygon.
Calculate the percentage of area of the Phoenix area with a BII index of at least .07 in 2017 and 2020.
Plotting the 2020 data, revealing areas that show a loss of biodiversity.
About the data: 1. The first data set is is the Biodiversity Intactness Index (BII) Time Series. Access the io-biodiversity collection from the Microsoft Planetary Computer STAC catalog. I will be using the 2017 and 2020 rasters covering the Phoenix subdivision.
- The second data set is the Phoenix Subdivision Shapefile Download the Phoenix subdivision polygon from the Census County Subdivision shapefiles for Arizona. All legal boundaries and names are as of January 1, 2024. The 2024 TIGER/Line Shapefiles were released on September 25, 2024. https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2022&layergroup=County+Subdivisions
Citations and Data Access Links:
C. Galaz García, EDS 220 - Working with Environmental Datasets, Course Notes. 2024. [Online]. Available: https://meds-eds-220.github.io/MEDS-eds-220-course/book/preface.html
Microsoft Planetary Computer, STAC Catalog. Biodiversity Intactness (‘io-biodiversity’). [Dataset]. https://planetarycomputer.microsoft.com/dataset/io-biodiversity Accessed 2 December 2024.
United States Census Bureau. (2022). Arizona County Subdivision 2022 TIGER/Line Shapefiles. [Data File]. U.S. Census Bureau, Geography Division. https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2022&layergroup=County+Subdivisions Accessed 2 December 2024.
Both of these datasets were accessed for this analysis on 12/2/2024.
To begin the analysis, I will first read in and explore the shapefile data that I have for Arizona, specifically looking for the Phoenix area.
# Read in shapefile data for Arizona
= gpd.read_file('data/tl_2022_04_cousub.shp')
arizona 3) arizona.head(
STATEFP | COUNTYFP | COUSUBFP | COUSUBNS | GEOID | NAME | NAMELSAD | LSAD | CLASSFP | MTFCC | CNECTAFP | NECTAFP | NCTADVFP | FUNCSTAT | ALAND | AWATER | INTPTLAT | INTPTLON | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 04 | 005 | 91198 | 01934931 | 0400591198 | Flagstaff | Flagstaff CCD | 22 | Z5 | G4040 | None | None | None | S | 12231052883 | 44653332 | +35.1066114 | -111.3662497 | POLYGON ((-112.13370 35.85596, -112.13368 35.8... |
1 | 04 | 005 | 91838 | 01934953 | 0400591838 | Kaibab Plateau | Kaibab Plateau CCD | 22 | Z5 | G4040 | None | None | None | S | 7228864534 | 29327221 | +36.5991097 | -112.1368033 | POLYGON ((-112.66039 36.53941, -112.66033 36.5... |
2 | 04 | 005 | 91683 | 01934950 | 0400591683 | Hualapai | Hualapai CCD | 22 | Z5 | G4040 | None | None | None | S | 2342313339 | 3772690 | +35.9271665 | -113.1170408 | POLYGON ((-113.35416 36.04097, -113.35416 36.0... |
# Clean up Arizona data
= arizona.columns.str.lower()
arizona.columns
# Select just the Phoenix area
= arizona[arizona.name == "Phoenix"]
phoenix
# Generate a quick plot of the Phoenix shape
phoenix.plot()
When making maps it’s important to know what Coordinate Reference System we are dealing with. Let’s check what we have for the Phoenix file:
phoenix.crs
<Geographic 2D CRS: EPSG:4269>
Name: NAD83
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: North America - onshore and offshore: Canada - Alberta; British Columbia; Manitoba; New Brunswick; Newfoundland and Labrador; Northwest Territories; Nova Scotia; Nunavut; Ontario; Prince Edward Island; Quebec; Saskatchewan; Yukon. Puerto Rico. United States (USA) - Alabama; Alaska; Arizona; Arkansas; California; Colorado; Connecticut; Delaware; Florida; Georgia; Hawaii; Idaho; Illinois; Indiana; Iowa; Kansas; Kentucky; Louisiana; Maine; Maryland; Massachusetts; Michigan; Minnesota; Mississippi; Missouri; Montana; Nebraska; Nevada; New Hampshire; New Jersey; New Mexico; New York; North Carolina; North Dakota; Ohio; Oklahoma; Oregon; Pennsylvania; Rhode Island; South Carolina; South Dakota; Tennessee; Texas; Utah; Vermont; Virginia; Washington; West Virginia; Wisconsin; Wyoming. US Virgin Islands. British Virgin Islands.
- bounds: (167.65, 14.92, -40.73, 86.45)
Datum: North American Datum 1983
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich
Now that I have my Arizona data looking how I want it, lets access the Microsoft Planetary Computer!
# Access the MPC catalog
= pystac_client.Client.open(
catalog "https://planetarycomputer.microsoft.com/api/stac/v1",
=planetary_computer.sign_inplace,
modifier
)
catalog.get_collections()= list(catalog.get_collections())
collections
# Print the number of collections
print('Number of collections:', len(collections))
#Pull out the Impact Observatory collection
= catalog.get_child('io-biodiversity')
io_collection io_collection
Number of collections: 124
- type "Collection"
- id "io-biodiversity"
- stac_version "1.0.0"
- description "Generated by [Impact Observatory](https://www.impactobservatory.com/), in collaboration with [Vizzuality](https://www.vizzuality.com/), these datasets estimate terrestrial Biodiversity Intactness as 100-meter gridded maps for the years 2017-2020. Maps depicting the intactness of global biodiversity have become a critical tool for spatial planning and management, monitoring the extent of biodiversity across Earth, and identifying critical remaining intact habitat. Yet, these maps are often years out of date by the time they are available to scientists and policy-makers. The datasets in this STAC Collection build on past studies that map Biodiversity Intactness using the [PREDICTS database](https://onlinelibrary.wiley.com/doi/full/10.1002/ece3.2579) of spatially referenced observations of biodiversity across 32,000 sites from over 750 studies. The approach differs from previous work by modeling the relationship between observed biodiversity metrics and contemporary, global, geospatial layers of human pressures, with the intention of providing a high resolution monitoring product into the future. Biodiversity intactness is estimated as a combination of two metrics: Abundance, the quantity of individuals, and Compositional Similarity, how similar the composition of species is to an intact baseline. Linear mixed effects models are fit to estimate the predictive capacity of spatial datasets of human pressures on each of these metrics and project results spatially across the globe. These methods, as well as comparisons to other leading datasets and guidance on interpreting results, are further explained in a methods [white paper](https://ai4edatasetspublicassets.blob.core.windows.net/assets/pdfs/io-biodiversity/Biodiversity_Intactness_whitepaper.pdf) entitled “Global 100m Projections of Biodiversity Intactness for the years 2017-2020.” All years are available under a Creative Commons BY-4.0 license. "
links[] 7 items
0
- rel "items"
- href "https://planetarycomputer.microsoft.com/api/stac/v1/collections/io-biodiversity/items"
- type "application/geo+json"
1
- rel "root"
- href "https://planetarycomputer.microsoft.com/api/stac/v1/"
- type "application/json"
- title "Microsoft Planetary Computer STAC API"
2
- rel "license"
- href "https://creativecommons.org/licenses/by/4.0/"
- type "text/html"
- title "CC BY 4.0"
3
- rel "about"
- href "https://ai4edatasetspublicassets.blob.core.windows.net/assets/pdfs/io-biodiversity/Biodiversity_Intactness_whitepaper.pdf"
- type "application/pdf"
- title "Technical White Paper"
4
- rel "describedby"
- href "https://planetarycomputer.microsoft.com/dataset/io-biodiversity"
- type "text/html"
- title "Human readable dataset overview and reference"
5
- rel "self"
- href "https://planetarycomputer.microsoft.com/api/stac/v1/collections/io-biodiversity"
- type "application/json"
6
- rel "parent"
- href "https://planetarycomputer.microsoft.com/api/stac/v1"
- type "application/json"
- title "Microsoft Planetary Computer STAC API"
stac_extensions[] 3 items
- 0 "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json"
- 1 "https://stac-extensions.github.io/raster/v1.1.0/schema.json"
- 2 "https://stac-extensions.github.io/table/v1.2.0/schema.json"
item_assets
data
- type "image/tiff; application=geotiff; profile=cloud-optimized"
roles[] 1 items
- 0 "data"
- title "Biodiversity Intactness"
- description "Terrestrial biodiversity intactness at 100m resolution"
raster:bands[] 1 items
0
- sampling "area"
- data_type "float32"
- spatial_resolution 100
- msft:region "westeurope"
- msft:container "impact"
- msft:storage_account "pcdata01euw"
- msft:short_description "Global terrestrial biodiversity intactness at 100m resolution for years 2017-2020"
- title "Biodiversity Intactness"
extent
spatial
bbox[] 1 items
0[] 4 items
- 0 -180
- 1 -90
- 2 180
- 3 90
temporal
interval[] 1 items
0[] 2 items
- 0 "2017-01-01T00:00:00Z"
- 1 "2020-12-31T23:59:59Z"
- license "CC-BY-4.0"
keywords[] 2 items
- 0 "Global"
- 1 "Biodiversity"
providers[] 3 items
0
- name "Impact Observatory"
roles[] 3 items
- 0 "processor"
- 1 "producer"
- 2 "licensor"
- url "https://www.impactobservatory.com/"
1
- name "Vizzuality"
roles[] 1 items
- 0 "processor"
- url "https://www.vizzuality.com/"
2
- name "Microsoft"
roles[] 1 items
- 0 "host"
- url "https://planetarycomputer.microsoft.com"
summaries
version[] 1 items
- 0 "v1"
assets
thumbnail
- href "https://ai4edatasetspublicassets.blob.core.windows.net/assets/pc_thumbnails/io-biodiversity-thumb.png"
- title "Biodiversity Intactness"
- media_type "image/png"
geoparquet-items
- href "abfs://items/io-biodiversity.parquet"
- type "application/x-parquet"
- title "GeoParquet STAC items"
- description "Snapshot of the collection's STAC items exported to GeoParquet format."
msft:partition_info
- is_partitioned False
table:storage_options
- account_name "pcstacitems"
roles[] 1 items
- 0 "stac-items"
Now that we have our data, we need to specify the temporal and spatial information we are interested in. I will create a bounding box, that specifies our spatial area of interest.
As we can see from the description of the Impact Observatory Collection above, the only dates represented in the data are our time range of interest (2017-2020). As such, there is no need to specify a temporal variable.
# Create bounding box
= [-112.826843, 32.974108, -111.184387, 33.863574] bbox_of_interest
# Catalog search
= catalog.search(
search = ['io-biodiversity'],
collections = bbox_of_interest)
bbox
# Get items from search
= search.item_collection()
items
# Determine number of items in search
print(f'There are {len(items)} items in the search.')
There are 4 items in the search.
# Retrieve items
= {item.id : item for item in search.items()}
item_names list(item_names) # from this list, we can see that our 1st item is 2020 data, and our 4th item is 2017 data
# Select 2017 subset
= items[3]
phx_2017
# Select 2020 subset
= items[0] phx_2020
Our search returned four STAC Items. We can tell from their IDs that that they contain data for the same area but for different times, specifically the years 2017 through 2020. Let’s display the available assets and properties for the 2017 Item.
= rich.table.Table("Asset Key", "Asset Title")
asset_table for key, value in items[-1].assets.items():
asset_table.add_row(key, value.title) asset_table
┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ Asset Key ┃ Asset Title ┃ ┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ data │ Biodiversity Intactness │ │ tilejson │ TileJSON with default rendering │ │ rendered_preview │ Rendered preview │ └──────────────────┴─────────────────────────────────┘
= rich.table.Table("Property Name", "Property Value")
property_table for key, value in sorted(items[-1].properties.items()):
str(value))
property_table.add_row(key, property_table
┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ Property Name ┃ Property Value ┃ ┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ datetime │ None │ │ end_datetime │ 2017-12-31T23:59:59Z │ │ proj:epsg │ 4326 │ │ proj:shape │ [7992, 7992] │ │ proj:transform │ [0.0008983152841195215, 0.0, -115.38597824385106, 0.0, -0.0008983152841195215, │ │ │ 34.74464974521749, 0.0, 0.0, 1.0] │ │ start_datetime │ 2017-01-01T00:00:00Z │ └────────────────┴────────────────────────────────────────────────────────────────────────────────────────────────┘
Let’s look at a raster.
We can see from the tables, that the Planetary Computer includes a “Rendered Preview”. Let’s take a look at the 2017 data:
# Plot rendered preview
=phx_2017.assets['rendered_preview'].href, width=500) Image(url

Visualization: Map of Phoenix County AZ
In order to better visualize the Phoenix area, I will make a map, and add in a basemap using the Contextily package. This map will give us a good visual of the greater Phoenix area, and will better help us understand our biodiveristy analysis later.
# Set up the figure
= plt.subplots()
fig, ax
# Create the axis with plot
= phoenix.to_crs(epsg=3857)
phoenix_merc =ax, figsize=(11, 10), alpha=0.45, edgecolor="k", label="Phoenix Boundary")
phoenix_merc.plot(ax
# Add NatGeo basemap from contextily
=ax, source=ctx.providers.Esri.NatGeoWorldMap)
ctx.add_basemap(ax
# Add legend
= [Line2D([0], [0], color='k', alpha=0.45, lw=3, label='Phoenix County Boundary')]
legend_elements =legend_elements, loc="upper left", fontsize=12)
ax.legend(handles
# Update axes
"Phoenix County, Arizona", fontdict={"fontsize": 20})
ax.set_title(
ax.set_axis_off()
plt.show()
Biodiversity Intactness Index
Now that we have a good basemap, showing roads and ciites for context, we can work on also plotting our Plantary Computer data, to see biodiversity intactness in this highly urban area.
Remembering the data tables created above, I know that I am interested the “data” asset, which should contain the information that I need to continue my comparative analysis. Since my analysis will involve directly comparing the years 2017 and 2020, I will create variables defining the assets of those years specifically.
# Retrieve 2017 biodiversity data
= phx_2017.assets["data"]
phx_2017_asset = rioxr.open_rasterio(phx_2017_asset.href)
phx_2017_data
# Retrieve 2020 biodiversity data
= phx_2020.assets["data"]
phx_2020_asset = rioxr.open_rasterio(phx_2020_asset.href)
phx_2020_data
phx_2017_data phx_2020_data
<xarray.DataArray (band: 1, y: 7992, x: 7992)> Size: 255MB [63872064 values with dtype=float32] Coordinates: * band (band) int64 8B 1 * x (x) float64 64kB -115.4 -115.4 -115.4 ... -108.2 -108.2 -108.2 * y (y) float64 64kB 34.74 34.74 34.74 34.74 ... 27.57 27.57 27.57 spatial_ref int64 8B 0 Attributes: AREA_OR_POINT: Area scale_factor: 1.0 add_offset: 0.0
Since there is only 1 band, we can remove that dimension for a more easily managed dataset.
= phx_2017_data.squeeze().drop_vars('band')
phx_2017_data = phx_2020_data.squeeze().drop_vars('band') phx_2020_data
Now, I need to clip the Planetary Computer data to match the Phoenix polygon I created earlier. I will make sure that my Coordinate Reference Systems match, before clipping and plotting.
# Match CRSs
= phoenix.to_crs(phx_2017_data.rio.crs)
phoenix
# Assert check to ensure it worked
assert phoenix.crs == phx_2017_data.rio.crs
# Clip 2017 BII raster to the Phoenix polygon
= phx_2017_data.rio.clip(phoenix["geometry"])
phx_clip_2017
# And again for 2020
= phx_2020_data.rio.clip(phoenix["geometry"])
phx_clip_2020
# Let's see how that looks for 2017
phx_clip_2017.plot()
Calculating the percentage of area of the Phoenix subdivision with a BII of at least 0.75 in 2017 and 2020.
In order to do this calculation, I need to do a bit of “map algebra”.
I need to find the total number of pixels in the Phoenix subdivision polygon, for both years.
Then, I need to find the numbers of pixels with the BII at 0.75 or higher, again for both years.
Finally, I can use those pixel numbers I get to calculate the desired percentage.
# Calculate total area for 2017
= phx_clip_2017.count().item()
total_2017 # Calculate total area for 2020
= phx_clip_2020.count().item()
total_2020
# Calculate BII % for 2017, Make the data binary for easy math
= (phx_clip_2017 >= 0.75).astype(int)
phx_bii_2017 = phx_bii_2017.sum().item()
pixels_2017
# Calculate BII % for 2020
= (phx_clip_2020 >= 0.75).astype(int)
phx_bii_2020 = phx_bii_2020.sum().item()
pixels_2020
# Calcualte the percentage area for 2017 data:
= (pixels_2017 / total_2017) * 100
pct_bii_2017
# Calculate the percentage area for 2020 data:
= (pixels_2020 / total_2020) * 100
pct_bii_2020
# Print output
= "The percentage of area in Phoenix County with a BII over 0.75 in"
pct_text print(pct_text, "2017 is: ", round(pct_bii_2017, 2), "%")
print(pct_text, "2020 is: ", round(pct_bii_2020, 2), "%")
The percentage of area in Phoenix County with a BII over 0.75 in 2017 is: 7.13 %
The percentage of area in Phoenix County with a BII over 0.75 in 2020 is: 6.49 %
Visualization: Biodiversity Loss
Now that we have calculated the differences in biodiversity density between the two years, we can visualize this difference with a map.
# Find areas where there was a loss in BII from 2017 to 2020
= phx_bii_2017 - phx_bii_2020
phx_bii_diff
# Make a mask - like in 223!
= phx_bii_diff == 1
phx_mask
# Convert to int type
= phx_mask.astype(int)
phx_mask
# Make a color gradient
= ["none", "red"]
color_id
# Make a color map object
= plt.cm.colors.ListedColormap(color_id)
cmap
# Plot!
= cmap, add_colorbar = False) phx_mask.plot(cmap
Now that we have applied a color map visualization to our biodiversity loss, we can overlay this with our Planetary Computer raster, and our Phoenix subdivision polygon, to create a final visualization that shows us the area, in red, where Phoenix county experienced significant biodiversity losee.
Final Visualization:
# Initialize plot
= plt.subplots(figsize = (8, 7))
fig, ax
# Remove axes
"off")
ax.axis(
# Begin with clipped raster layer
= ax,
phx_clip_2020.plot(ax = {"location": "bottom",
cbar_kwargs "label": "BII in 2020"})
# Add mask layer
= ax,
phx_mask.plot(ax = cmap,
cmap = False)
add_colorbar = mpatches.Patch(color = "red",
phx_mask_patch = "Area with 75% Biodiversity Loss between 2017 and 2020")
label # Add polygon layer
= ax,
phoenix.plot(ax = "none",
color = "black",
edgecolor = 2)
linewidth
# update legend, add in BII loss patch
= [phx_mask_patch],
ax.legend(handles = False,
frameon = (0.85, -0.07))
bbox_to_anchor "Biodiversity Intactness Index (BII) and Biodiversity Loss in Phoenix, AZ")
ax.set_title(
plt.show()
Conclusion
Python and Jupyter Notebooks, as we can see, are powerful tools that allow us to conduct high level analyis with large amounts of spatial data.
In this analysis, we can use these tools to visualize the sharp decrease in biodiversity in this area, perhaps due to urbanization and human impact. Such an analysis can be used to further understand the overall biodiversity crisis, and can help inform decision makers about the needs and priorities of conservation efforts.
Citations and Data Access:
C. Galaz García, EDS 220 - Working with Environmental Datasets, Course Notes. 2024. [Online]. Available: https://meds-eds-220.github.io/MEDS-eds-220-course/book/preface.html
Microsoft Planetary Computer, STAC Catalog. Biodiversity Intactness (‘io-biodiversity’). [Dataset]. https://planetarycomputer.microsoft.com/dataset/io-biodiversity Accessed 2 December 2024.
United States Census Bureau. (2022). Arizona County Subdivision 2022 TIGER/Line Shapefiles. [Data File]. U.S. Census Bureau, Geography Division. https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2022&layergroup=County+Subdivisions Accessed 2 December 2024.
Citation
@online{jørgensen2024,
author = {Jørgensen, Bailey},
title = {Biodiversity {Intactness} {Index} {(BII)} Change in
{Phoenix,} {AZ}},
date = {2024-09-29},
url = {https://jorb1.github.io/posts/2024-09-19-BII/},
langid = {en}
}