import pandas as pd
import geopandas as gpd
import numpy as np
import plotly.express as px
import plotly.io as pio 
import plotly.figure_factory as ff
import plotly.graph_objects as go
pio.renderers.default = "vscode+notebook"
#Import the data
trees = pd.read_csv('data/trees.csv')
neighborhoods = gpd.read_file('data/nta.shp')


#There are rows of dead trees missing spc_common, dropping them.
trees = trees.dropna()


#Change column names to informative descriptions for charts.
trees.columns = trees.columns.str.replace("spc_common","Name")
trees.columns = trees.columns.str.replace("nta_name","Neighborhood")
trees.columns = trees.columns.str.replace("tree_dbh","Tree Size")
neighborhoods.columns = neighborhoods.columns.str.replace("ntaname","Neighborhood")

#Capitalise all tree names, so they look nice. 
trees['Name'] = trees['Name'].str.capitalize()

#Setup default parameters
px.defaults.color_continuous_scale='algae'
px.defaults.height = 670
cp=dict(lat=40.79, lon=-73.96)
init_zoom = 10.6
map = "carto-positron"
mg = dict(l=20, r=20, b=20, t=100)
total_trees = trees['Name'].count()


trees['Name'].value_counts().head(10)

Honeylocust          13175
Callery pear          7297
Ginkgo                5859
Pin oak               4584
Sophora               4453
London planetree      4122
Japanese zelkova      3596
Littleleaf linden     3333
American elm          1698
American linden       1583
Name: Name, dtype: int64


fig = px.bar(trees.groupby(['Name'])['Name'].agg(['count']).sort_values(by='count', ascending=False).head(10), title='Top 10 tree species in Manhattan', y='count', color='count', height=500, text_auto=True)
fig.update_layout(yaxis_title='Number of trees', xaxis_title='', showlegend=False)
fig.update_coloraxes(colorbar_ticklabelposition='inside',colorbar_ticks='inside',showscale=False)
fig.update_traces(hovertemplate='%{x}, %{y} trees',marker_line_color='darkgreen', marker_line_width=1.0)
fig.show()


#Create dataframe with tree counts, density + geo data for plotting 
nhood_m = neighborhoods.to_crs(epsg=6933) # we want metres to calculate trees per sqKm
areas = nhood_m.area
neighborhoods["area_sqKm"] = areas.values/10e5
grp_trees = trees.groupby('nta')['Neighborhood'].agg(['count'])
trees_geo = neighborhoods.merge(grp_trees, left_on=['ntacode'], right_on=['nta'])
trees_geo['Density'] =  trees_geo['count'] // trees_geo['area_sqKm']
trees_geo['Tree %'] = ((trees_geo['count'] * 10000) // total_trees)/100


df_plot = trees_geo.sort_values(by='count', ascending=True)[['Neighborhood','count']].set_index('Neighborhood')
fig = px.bar(df_plot.tail(10), x='count', title='Top 10 leafy neighborhoods by number of trees',
    color='count', range_x=(500,6000), height=500, text_auto=True)
fig.update_layout(yaxis_title='', xaxis_title='Trees')
fig.update(layout_coloraxis_showscale=False)
fig.update_traces(hovertemplate='%{x} trees', marker_line_color='darkgreen', marker_line_width=1.0) 
fig.show()


df_plot = trees_geo.set_index('Neighborhood')
fig = px.choropleth_mapbox(df_plot, geojson=eval(df_plot['geometry'].to_json()), locations=df_plot.index, center=cp, mapbox_style=map,
    zoom=init_zoom, color='count', opacity=0.8, title='Which neighborhood has the most trees?')
fig.update_coloraxes(colorbar_title='Trees')  
fig.update_traces(hovertemplate='%{location}, %{z} trees') 
fig.update_layout(margin=mg)                         
fig.show()


df_plot=trees_geo.sort_values(by='Density', ascending=True)[['Neighborhood','Density']].set_index('Neighborhood')
fig = px.bar(df_plot.tail(10), x='Density', title='Top 10 leafy neighborhoods by density of trees',color='Density',height=500, text_auto=True)
fig.update_layout(yaxis_title='', xaxis_title='Trees per km²')
fig.update(layout_coloraxis_showscale=False)
fig.update_traces(hovertemplate='%{x} trees per km²',marker_line_color='darkgreen', marker_line_width=1.0) 
fig.show()


df_plot = trees_geo.set_index('Neighborhood')
fig = px.choropleth_mapbox(df_plot, geojson=eval(df_plot['geometry'].to_json()), locations=df_plot.index, center=cp, mapbox_style=map, zoom=init_zoom, color='Density', opacity=0.8, 
    title='Which neighborhood has the highest density of trees?')
fig.update_coloraxes(colorbar_title='Trees per km²')  
fig.update_traces(hovertemplate='%{location}, %{z} trees per km²') 
fig.update_layout(margin=mg)            
fig.show()


trees_geo[['Neighborhood','Tree %']].sort_values(by='Tree %',ascending=False).set_index('Neighborhood').head(5)


fig = ff.create_hexbin_mapbox(data_frame=trees, lat="latitude", lon="longitude", nx_hexagon=40, opacity=0.7, labels={"color": "Trees"},
    center=cp, min_count=1, color_continuous_scale='algae', zoom=init_zoom, title='Where are all the trees in Manhattan?',
    mapbox_style=map, agg_func=np.sum, show_original_data=False, original_data_marker=dict(size=1, opacity=0.5, color="yellowgreen"))
fig.update_coloraxes(colorbar_ticklabelposition='inside')
fig.update_layout(margin=mg)
fig.show()


# Create DataFrame by name with median tree size to avoid influence of outliers 
tree_size = trees.groupby(['Name'])['Tree Size'].agg(['median'])
tree_size = tree_size.sort_values(by='median',ascending=False)
tree_size = tree_size.reset_index()
tree_size.columns = ['Name','Median Size']


# Create DataFrame by name and tree health
tree_health = trees.groupby(['Name','health'])['Name'].agg('count')
tree_health = tree_health.unstack()
tree_health = tree_health.fillna(0)
tree_health['Total'] = tree_health.sum(axis='columns')
tree_health['Good %'] = (tree_health['Good'] * 100) // tree_health['Total'] 
tree_health['Fair %'] = (tree_health['Fair'] * 100) // tree_health['Total']
tree_health['Poor %'] = (tree_health['Poor'] * 100) // tree_health['Total']
tree_health = tree_health.reset_index()


# Merge the tree size & health DataFrames
tree_size_health = pd.merge(tree_size, tree_health, on='Name')


#skip lone trees
tree_selection = tree_size_health[tree_size_health['Total']>=2][['Name','Median Size','Good %','Fair %','Poor %','Total']]
tree_selection = tree_selection.sort_values(by=['Median Size','Good %'], ascending=[False,False])
fig = px.scatter(tree_selection,x='Median Size', y='Good %', title='Trees species by health and trunk size', hover_name="Name", 
    hover_data=["Median Size", "Good %",'Fair %','Poor %','Total'])
fig.update_layout(yaxis_title='Good health %', xaxis_title='Median trunk size (inches)')
fig.add_annotation(x=11, y=98, text="Tree Species Recommendations", showarrow=False, yshift=10)
fig.update_traces(marker_size=8, marker_color='darkgreen')
fig.add_shape(type="rect", x0=7.8, y0=75, x1=14.5, y1=102, fillcolor="green", opacity=0.3)
fig.show()


tree_selection = tree_selection[tree_selection['Good %'] >= 75]
tree_selection = tree_selection.set_index('Name')
tree_selection.columns = ['Median trunk size', 'Good health %','Fair health %','Poor health %','Current population']
tree_selection.head(10)

	Tree %
Neighborhood
Upper West Side	9.16
Upper East Side-Carnegie Hill	7.27
West Village	5.95
Central Harlem North-Polo Grounds	5.37
Hudson Yards-Chelsea-Flatiron-Union Square	4.48

	Median trunk size	Good health %	Fair health %	Poor health %	Current population
Name
American elm	12.0	80.0	15.0	4.0	1698.0
Black maple	11.0	90.0	10.0	0.0	10.0
Amur cork tree	11.0	87.0	12.0	0.0	8.0
Siberian elm	11.0	83.0	11.0	4.0	156.0
Tree of heaven	11.0	78.0	16.0	4.0	104.0
Willow oak	10.0	84.0	12.0	3.0	889.0
Black locust	10.0	77.0	20.0	2.0	259.0
White ash	9.5	80.0	16.0	4.0	50.0
Honeylocust	9.0	83.0	15.0	1.0	13175.0
Pin oak	9.0	81.0	15.0	2.0	4584.0

Which Tree Species Should The City Plant?¶

📖 Background¶

💾 The data¶

💪 Challenge¶

Q. What are the most common tree species in Manhattan?¶

Q. Which are the neighborhoods with the most trees?¶

Visualization of Manhattan's tree locations.¶

Which Tree Species Should The City Plant?¶

📖 Background¶

💾 The data¶

💪 Challenge¶

Q. What are the most common tree species in Manhattan?¶

Q. Which are the neighborhoods with the most trees?¶

Visualization of Manhattan's tree locations.¶

Q. What ten tree species would you recommend the city plant in the future?¶