You work for a nonprofit organization advising the planning department on ways to improve the quantity and quality of trees in New York City. The urban design team believes tree size (using trunk diameter as a proxy for size) and health are the most desirable characteristics of city trees.
The city would like to learn more about which tree species are the best choice to plant on the streets of Manhattan.
The team has provided access to the 2015 tree census and geographical information on New York City neighborhoods (trees, neighborhoods):
Tree census and neighborhood information from the City of New York NYC Open Data.
Create a report that covers the following:
import pandas as pd
import geopandas as gpd
import numpy as np
import plotly.express as px
import plotly.io as pio
import plotly.figure_factory as ff
import plotly.graph_objects as go
pio.renderers.default = "vscode+notebook"
#Import the data
trees = pd.read_csv('data/trees.csv')
neighborhoods = gpd.read_file('data/nta.shp')
#There are rows of dead trees missing spc_common, dropping them.
trees = trees.dropna()
#Change column names to informative descriptions for charts.
trees.columns = trees.columns.str.replace("spc_common","Name")
trees.columns = trees.columns.str.replace("nta_name","Neighborhood")
trees.columns = trees.columns.str.replace("tree_dbh","Tree Size")
neighborhoods.columns = neighborhoods.columns.str.replace("ntaname","Neighborhood")
#Capitalise all tree names, so they look nice.
trees['Name'] = trees['Name'].str.capitalize()
#Setup default parameters
px.defaults.color_continuous_scale='algae'
px.defaults.height = 670
cp=dict(lat=40.79, lon=-73.96)
init_zoom = 10.6
map = "carto-positron"
mg = dict(l=20, r=20, b=20, t=100)
total_trees = trees['Name'].count()
trees['Name'].value_counts().head(10)
Honeylocust 13175 Callery pear 7297 Ginkgo 5859 Pin oak 4584 Sophora 4453 London planetree 4122 Japanese zelkova 3596 Littleleaf linden 3333 American elm 1698 American linden 1583 Name: Name, dtype: int64
fig = px.bar(trees.groupby(['Name'])['Name'].agg(['count']).sort_values(by='count', ascending=False).head(10), title='Top 10 tree species in Manhattan', y='count', color='count', height=500, text_auto=True)
fig.update_layout(yaxis_title='Number of trees', xaxis_title='', showlegend=False)
fig.update_coloraxes(colorbar_ticklabelposition='inside',colorbar_ticks='inside',showscale=False)
fig.update_traces(hovertemplate='%{x}, %{y} trees',marker_line_color='darkgreen', marker_line_width=1.0)
fig.show()
A. The most popular tree species in Manhattan is the Honeylocust.
#Create dataframe with tree counts, density + geo data for plotting
nhood_m = neighborhoods.to_crs(epsg=6933) # we want metres to calculate trees per sqKm
areas = nhood_m.area
neighborhoods["area_sqKm"] = areas.values/10e5
grp_trees = trees.groupby('nta')['Neighborhood'].agg(['count'])
trees_geo = neighborhoods.merge(grp_trees, left_on=['ntacode'], right_on=['nta'])
trees_geo['Density'] = trees_geo['count'] // trees_geo['area_sqKm']
trees_geo['Tree %'] = ((trees_geo['count'] * 10000) // total_trees)/100
df_plot = trees_geo.sort_values(by='count', ascending=True)[['Neighborhood','count']].set_index('Neighborhood')
fig = px.bar(df_plot.tail(10), x='count', title='Top 10 leafy neighborhoods by number of trees',
color='count', range_x=(500,6000), height=500, text_auto=True)
fig.update_layout(yaxis_title='', xaxis_title='Trees')
fig.update(layout_coloraxis_showscale=False)
fig.update_traces(hovertemplate='%{x} trees', marker_line_color='darkgreen', marker_line_width=1.0)
fig.show()
df_plot = trees_geo.set_index('Neighborhood')
fig = px.choropleth_mapbox(df_plot, geojson=eval(df_plot['geometry'].to_json()), locations=df_plot.index, center=cp, mapbox_style=map,
zoom=init_zoom, color='count', opacity=0.8, title='Which neighborhood has the most trees?')
fig.update_coloraxes(colorbar_title='Trees')
fig.update_traces(hovertemplate='%{location}, %{z} trees')
fig.update_layout(margin=mg)
fig.show()
df_plot=trees_geo.sort_values(by='Density', ascending=True)[['Neighborhood','Density']].set_index('Neighborhood')
fig = px.bar(df_plot.tail(10), x='Density', title='Top 10 leafy neighborhoods by density of trees',color='Density',height=500, text_auto=True)
fig.update_layout(yaxis_title='', xaxis_title='Trees per km²')
fig.update(layout_coloraxis_showscale=False)
fig.update_traces(hovertemplate='%{x} trees per km²',marker_line_color='darkgreen', marker_line_width=1.0)
fig.show()
df_plot = trees_geo.set_index('Neighborhood')
fig = px.choropleth_mapbox(df_plot, geojson=eval(df_plot['geometry'].to_json()), locations=df_plot.index, center=cp, mapbox_style=map, zoom=init_zoom, color='Density', opacity=0.8,
title='Which neighborhood has the highest density of trees?')
fig.update_coloraxes(colorbar_title='Trees per km²')
fig.update_traces(hovertemplate='%{location}, %{z} trees per km²')
fig.update_layout(margin=mg)
fig.show()
Top 5 neighborhoods as a percentage of trees
trees_geo[['Neighborhood','Tree %']].sort_values(by='Tree %',ascending=False).set_index('Neighborhood').head(5)
Tree % | |
---|---|
Neighborhood | |
Upper West Side | 9.16 |
Upper East Side-Carnegie Hill | 7.27 |
West Village | 5.95 |
Central Harlem North-Polo Grounds | 5.37 |
Hudson Yards-Chelsea-Flatiron-Union Square | 4.48 |
A. The neighborhood of Upper West Side has the greatest number of trees with a total count of 5723 which is 9.16% of Manhattan trees.
However, there are two neighborhoods with a greater density of trees, being Upper East Side-Carnegie Hill and Central Harlem South.
fig = ff.create_hexbin_mapbox(data_frame=trees, lat="latitude", lon="longitude", nx_hexagon=40, opacity=0.7, labels={"color": "Trees"},
center=cp, min_count=1, color_continuous_scale='algae', zoom=init_zoom, title='Where are all the trees in Manhattan?',
mapbox_style=map, agg_func=np.sum, show_original_data=False, original_data_marker=dict(size=1, opacity=0.5, color="yellowgreen"))
fig.update_coloraxes(colorbar_ticklabelposition='inside')
fig.update_layout(margin=mg)
fig.show()
The urban design team believes tree size (using trunk diameter as a proxy for size) and health are the most desirable characteristics of city trees.
# Create DataFrame by name with median tree size to avoid influence of outliers
tree_size = trees.groupby(['Name'])['Tree Size'].agg(['median'])
tree_size = tree_size.sort_values(by='median',ascending=False)
tree_size = tree_size.reset_index()
tree_size.columns = ['Name','Median Size']
# Create DataFrame by name and tree health
tree_health = trees.groupby(['Name','health'])['Name'].agg('count')
tree_health = tree_health.unstack()
tree_health = tree_health.fillna(0)
tree_health['Total'] = tree_health.sum(axis='columns')
tree_health['Good %'] = (tree_health['Good'] * 100) // tree_health['Total']
tree_health['Fair %'] = (tree_health['Fair'] * 100) // tree_health['Total']
tree_health['Poor %'] = (tree_health['Poor'] * 100) // tree_health['Total']
tree_health = tree_health.reset_index()
# Merge the tree size & health DataFrames
tree_size_health = pd.merge(tree_size, tree_health, on='Name')
#skip lone trees
tree_selection = tree_size_health[tree_size_health['Total']>=2][['Name','Median Size','Good %','Fair %','Poor %','Total']]
tree_selection = tree_selection.sort_values(by=['Median Size','Good %'], ascending=[False,False])
fig = px.scatter(tree_selection,x='Median Size', y='Good %', title='Trees species by health and trunk size', hover_name="Name",
hover_data=["Median Size", "Good %",'Fair %','Poor %','Total'])
fig.update_layout(yaxis_title='Good health %', xaxis_title='Median trunk size (inches)')
fig.add_annotation(x=11, y=98, text="Tree Species Recommendations", showarrow=False, yshift=10)
fig.update_traces(marker_size=8, marker_color='darkgreen')
fig.add_shape(type="rect", x0=7.8, y0=75, x1=14.5, y1=102, fillcolor="green", opacity=0.3)
fig.show()
A. The following 10 tree species are recommended for future planting in Manhattan.
tree_selection = tree_selection[tree_selection['Good %'] >= 75]
tree_selection = tree_selection.set_index('Name')
tree_selection.columns = ['Median trunk size', 'Good health %','Fair health %','Poor health %','Current population']
tree_selection.head(10)
Median trunk size | Good health % | Fair health % | Poor health % | Current population | |
---|---|---|---|---|---|
Name | |||||
American elm | 12.0 | 80.0 | 15.0 | 4.0 | 1698.0 |
Black maple | 11.0 | 90.0 | 10.0 | 0.0 | 10.0 |
Amur cork tree | 11.0 | 87.0 | 12.0 | 0.0 | 8.0 |
Siberian elm | 11.0 | 83.0 | 11.0 | 4.0 | 156.0 |
Tree of heaven | 11.0 | 78.0 | 16.0 | 4.0 | 104.0 |
Willow oak | 10.0 | 84.0 | 12.0 | 3.0 | 889.0 |
Black locust | 10.0 | 77.0 | 20.0 | 2.0 | 259.0 |
White ash | 9.5 | 80.0 | 16.0 | 4.0 | 50.0 |
Honeylocust | 9.0 | 83.0 | 15.0 | 1.0 | 13175.0 |
Pin oak | 9.0 | 81.0 | 15.0 | 2.0 | 4584.0 |