You work for a nonprofit organization advising the planning department on ways to improve the quantity and quality of trees in New York City. The urban design team believes tree size (using trunk diameter as a proxy for size) and health are the most desirable characteristics of city trees.
The city would like to learn more about which tree species are the best choice to plant on the streets of Manhattan.
The team has provided access to the 2015 tree census and geographical information on New York City neighborhoods (trees, neighborhoods):
Tree census and neighborhood information from the City of New York NYC Open Data.
Create a report that covers the following:
import pandas as pd
import geopandas as gpd
import numpy as np
import plotly.express as px
import plotly.io as pio
import plotly.figure_factory as ff
import plotly.graph_objects as go
pio.renderers.default = "vscode+notebook"
#Import the data
trees = pd.read_csv('data/trees.csv')
neighborhoods = gpd.read_file('data/nta.shp')
#There are rows of dead trees missing spc_common, dropping them.
trees = trees.dropna()
#Change column names to informative descriptions for charts.
trees.columns = trees.columns.str.replace("spc_common","Name")
trees.columns = trees.columns.str.replace("nta_name","Neighborhood")
trees.columns = trees.columns.str.replace("tree_dbh","Tree Size")
neighborhoods.columns = neighborhoods.columns.str.replace("ntaname","Neighborhood")
#Capitalise all tree names, so they look nice.
trees['Name'] = trees['Name'].str.capitalize()
#Setup default parameters
px.defaults.color_continuous_scale='algae'
px.defaults.height = 670
cp=dict(lat=40.79, lon=-73.96)
init_zoom = 10.6
map = "carto-positron"
mg = dict(l=20, r=20, b=20, t=100)
total_trees = trees['Name'].count()
trees['Name'].value_counts().head(10)
Honeylocust 13175 Callery pear 7297 Ginkgo 5859 Pin oak 4584 Sophora 4453 London planetree 4122 Japanese zelkova 3596 Littleleaf linden 3333 American elm 1698 American linden 1583 Name: Name, dtype: int64
fig = px.bar(trees.groupby(['Name'])['Name'].agg(['count']).sort_values(by='count', ascending=False).head(10), title='Top 10 tree species in Manhattan', y='count', color='count', height=500, text_auto=True)
fig.update_layout(yaxis_title='Number of trees', xaxis_title='', showlegend=False)
fig.update_coloraxes(colorbar_ticklabelposition='inside',colorbar_ticks='inside',showscale=False)
fig.update_traces(hovertemplate='%{x}, %{y} trees',marker_line_color='darkgreen', marker_line_width=1.0)
fig.show()