import pymongo
import datetime
import collections
from numpy import nan as NA
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
#import seaborn as sns
import folium
import numpy as np
import pandas as pd
import scipy.stats
import psycopg2 as pg
import pandas as pd
import pandas.io.sql as psqlg
import matplotlib.patches as mpatches
from itertools import cycle, islice
Questions
If I classify the species ‘bat’ into a specific species based on location data? I could have this as one question and the other one could be a comparison of numbers of hibernating and roosting bats to ascertain if they follow similar patterns to determine if some types of bats survive hibernation better than others?
Question 1 Classification was carried out using the leave-one-out algorithm in 'LeaveOneOut' notebook and the K Nearest Neighbours classifier in the KNN notebook. The value of k which produced the best results was 1 using the leave-one-out algorithm, but this is not thought to be reliable and as such the value of 3 was selected based on manual calculation of optimum euclidean distance in the KNN notebook.
08/08/2016 Question 1 have the bat numbers over time increased by a healthy percentage. Question 2 have the bat numbers of the rare bat increased or decreased. Have the changes made by combining the datasets where commonname = 'bat' and re-classifying them made a difference to the results.
At present we are able to produce Roost Count trends for common pipistrelle, soprano pipistrelle, serotine, Natterer's bat, brown long-eared bat, lesser horseshoe bat and greater horseshoe bat
PrioritySpecies = ['Lesser Horseshoe Bat','Greater Horseshoe Bat', 'Soprano Pipistrelle','Brown Long-eared Bat']
#These bats are the priority species reported in visualisation from the report:
#The state of the UK’s bats 2014, National Bat Monitoring Programme Population Trends,(The Bat Conservation Trust, 2014)
OtherSpecies = ['Common Pipistrelle', 'Serotine', "'Natterer's Bat'", "'Daubenton's bat'",'Noctule',"'Brandt's bat'",
"'Whiskered/Brandt's bat'"]
#These bats are the additional species reported in visualisation from the report:
#The state of the UK’s bats 2014, National Bat Monitoring Programme Population Trends,(The Bat Conservation Trust, 2014)
Greater horseshoe bat* (Roost Count) significant increase
Lesser horseshoe bat* (Hibernation Survey & Roost Count) We conclude that the lesser horseshoe bat population is increasing
Daubenton’s bat (Hibernation Survey) Daubenton's bat is showing a small but significant increase
Common pipistrelle (Field Survey) We conclude that the common pipistrelle population is increasing from the upward trend seen in this species in the Field Survey
whiskered/Brandt’s bat, soprano pipistrelle, noctule, serotine and brown long-eared bat*; none of these showed significant changes over the monitoring period to 2013.
At present, there are insufficient data available for the other six UK breeding bat species (Bechstein’s bat, Alcathoe bat, Leisler’s bat, Nathusius’ pipistrelle, barbastelle and grey long-eared bat) to allow estimation of population trends
Grey long-eared bat surveillance We are always striving to improve our knowledge of the distribution of bat species, particularly where information is lacking or patchy. The grey longeared bat is one of the UK’s rarest bat species - 1,000 individuals and a distribution that is restricted mainly to the southern coast of England and Wales.
Reading in the cleaned and classified data. For details of the cleaning process please see R0050097_project_diary.
#Connecting to the database
%load_ext sql
%sql postgresql://test:test@localhost:5432/tm351test
Connecting to the database
conn = pg.connect(dbname='tm351test', host='localhost', user='test', password='test', port=5432)
Checking the tables in the tm351test schema.
%%sql
SELECT * FROM information_schema.tables
#Reading in the data from the tables in the postgreSQL database initialised in the notebook PostrgreSQL_DB
I will select a period of one year initially
#These are the classified hibernation bats with the commonname bat replaced by the nearest neighbour algorithm and the erroneous
#grid refences removed. This is therefore the cleaned data to work with.
Hibernation_data = pd.read_sql_query("SELECT * FROM ClassifiedHibernationBats",conn)
Hibernation_data.head()
len(Hibernation_data)
#checking there are no 'Bat' commonnames
batlist = Hibernation_data[Hibernation_data['commonname']=='Bat']
batlist
#counts for each commonname
cleaned = Hibernation_data.commonname.value_counts()
cleaned
Comparing the unclassified data - Hibuncleaned_data with the k-nn classified data - Hibernation_data to ensure integrity has been maintained.
Comparison of cleaned and uncleaned hibernation data
#reading hibernation data from the database
Hibuncleaned_data = pd.read_sql_query("SELECT * FROM HibernationBats", conn)
len(Hibuncleaned_data)
#looking at the 'Bat' commonnmae
batlist1 = Hibuncleaned_data[Hibuncleaned_data['commonname']=='Bat']
batlist1.head()
#checking the commonname counts
uncleaned = Hibuncleaned_data.commonname.value_counts()
uncleaned
#Comparing classified and non-classified counts.
fig = plt.figure(figsize = (15,10))
# axes object for first subplot.
# 111 is a grid of 1 by 1 subplots
cleanedAxes = fig.add_subplot(111)
cleaned.plot.line(ax=cleanedAxes, ylim=[0,7000], color='red',fontsize=15)
cleanedAxes.set_ylabel('Counts',fontsize=20)
# second axes object representing the extra months (3,4,8,9,10) from the roost dataset, in the same place (twinned
# with the ExtraAxes representing the extra months (3,4,8,9,10) from the hibernation dataset) shared (twiny() would
#allow sharing the y-axes).7000
uncleanedAxes = cleanedAxes.twinx()
# plot onto the ExtrasAxes:
uncleaned.plot(kind='bar',ax=uncleanedAxes,ylim=[0,7000], color='blue',fontsize=15)
# set the Extras Axes y-axis label,
uncleanedAxes.set_ylabel('Uncleaned Counts',fontsize=20)
# The mpatches.Patch() from matplotlib creates an element that can
# be used in the handles parameter of the plt.legend() function to create an entry in the legend.
import matplotlib.patches as mpatches
legendpatch_list = ([mpatches.Patch(color='red', label='Cleaned Counts')]+
[mpatches.Patch(color='blue', label='Uncleaned Counts')])
#the x,y values for the location are figure coordinates between 0 and 1.
plt.legend(handles=legendpatch_list, loc=(1.1,0.7), fontsize=20)
# and a title
plt.title('Hibernation Uncleaned and cleaned counts compared',
fontsize=20)
This looks very similar and the pattern is the same, which is reassuring. Classifying the 'Bat' species has not markedly changed the shape of the data.
Selecting one year of data to compare to one year of Roost data
Between 1999, when trends from standardised large-scale monitoring became available through the National Bat Monitoring Programme (NBMP), and 2013, bat populations have increased by 23 per cent; an assessment of the underlying smoothed trend shows this is a statistically significant increase. (jncc.defra, 2014) C8. Mammals of the wider countryside (bats)
# converting from object to int
Hibernation_data.month = Hibernation_data.month.astype(int)
Hibernation_data.year = Hibernation_data.year.astype(int)
Hibernation_data.dtypes
#Selecting the months from the study and the years from when better counting began. 2014 is a part year so is not included
SelectYears_df = Hibernation_data[(Hibernation_data['month'].isin([1,2]))
& (Hibernation_data['year']>=1999)&(Hibernation_data['year']< 2014)]
SelectYears_df.tail(10)
len(SelectYears_df)
SelectYears_df.commonname.value_counts()
#grouping to get a reasonable format to plot.
grp = SelectYears_df.groupby('year').apply(lambda x: x['commonname'].value_counts()).unstack().fillna(0)
grp.unstack()
grp.plot(figsize=(15,8))
#These are the classified roost bats with the commonname bat replaced by the nearest neighbour algorithm and the erroneous
#grid refences removed. This is therefore the cleaned data to work with.
Roost_data = pd.read_sql_query("SELECT * FROM ClassifiedRoostBats",conn)
Roost_data.head()
#minimum and maximum year
syear = Roost_data.year.min()
eyear = Roost_data.year.max()
print(syear, eyear)
#Selecting the uncleaned roost data from the database
Uncleaned_Roost_data = pd.read_sql_query("SELECT * FROM RoostBats", conn)
Uncleaned_Roost_data.head()
Comparison of classified and unclassified data.
#generating commonname counts
cleanedRoost = Roost_data.commonname.value_counts()
#generating commonname counts
uncleanedRoost = Uncleaned_Roost_data.commonname.value_counts()
Comparing the cleaned and uncleaned datasets.
#Comparing classified and non-classified counts.
fig = plt.figure(figsize = (15,12))
# axes object for first subplot.
# 111 is a grid of 1 by 1 subplots
cleanAxes = fig.add_subplot(111)
cleanedRoost.plot.line(ax=cleanAxes, ylim=[0,7000], color='red',fontsize=15)
cleanAxes.set_ylabel('Counts',fontsize=20)
uncleanAxes = cleanAxes.twinx()
# plot onto the ExtrasAxes:
uncleanedRoost.plot(kind='bar',ax=uncleanAxes,ylim=[0,7000], color='blue',fontsize=15)
# set the Extras Axes y-axis label,
uncleanAxes.set_ylabel('Uncleaned Counts',fontsize=20)
plt.legend(handles=legendpatch_list, loc=(1.1,0.7), fontsize=20)
plt.xlabel('Common name', fontsize=12)
plt.ylabel('Counts', fontsize=20)
# and a title
plt.title('Uncleaned and cleaned Roost counts compared',
fontsize=20)
#Comparing classified and non-classified counts.
fig = plt.figure(figsize = (10,8))
plt.scatter(uncleaned,uncleanedRoost, color=['red','blue'], marker='h',s =50)
axes = plt.gca()
m, b = np.polyfit(uncleaned,uncleanedRoost, 1)
plt.annotate('Outlier', xy=(5000,6400), xytext=(4400,6800),
arrowprops=dict(facecolor='black', shrink=0.05, width=8))
plt.annotate('Outlier', xy=(2800,2000), xytext=(2800,2500),
arrowprops=dict(facecolor='black', shrink=0.05, width=8))
X_plot = np.linspace(axes.get_xlim()[0],axes.get_xlim()[1],100)
plt.plot(X_plot, m*X_plot + b, '-', color='black')
plt.title('Correlation of uncleaned Roost and Hibernation Counts', fontsize=15, color='Black')
legendpatch_list = ([mpatches.Patch(color='red', label='Uncleaned Roost Counts')]+
[mpatches.Patch(color='blue', label='Uncleaned Hibernation Counts')])
plt.legend(handles=legendpatch_list, loc=(0.6,0.2), fontsize=12)
plt.xlabel('Bat Counts (Hibernation)',fontsize=15)
plt.ylabel('Bat Counts (Roost)',fontsize=15)
#We can see a strong correlation between the roost and hibernation datasets
#pearson coefficient to show strength of correlation
scipy.stats.pearsonr(uncleaned,uncleanedRoost)
#Comparing classified Roost and Hibernation counts.
fig = plt.figure(figsize = (10,8))
plt.scatter(cleaned,cleanedRoost, color=['red','blue'], marker='h',s =50)
axes = plt.gca()
m, b = np.polyfit(cleaned, cleanedRoost, 1)
X_plot = np.linspace(axes.get_xlim()[0],axes.get_xlim()[1],100)
plt.plot(X_plot, m*X_plot + b, '-', color='black')
plt.annotate('Outlier', xy=(5000,6400), xytext=(4500,6800),
arrowprops=dict(facecolor='black', shrink=0.05, width=8))
plt.annotate('Outlier', xy=(2800,2000), xytext=(2800,2500),
arrowprops=dict(facecolor='black', shrink=0.05, width=8))
plt.title('Correlation of cleaned Roost and Hibernation Counts', fontsize=15, color='Black')
legendpatch_list = ([mpatches.Patch(color='red', label='Cleaed Roost Counts')]+
[mpatches.Patch(color='blue', label='Cleaned Hibernation Counts')])
plt.legend(handles=legendpatch_list, loc=(0.6,0.2), fontsize=12)
plt.xlabel('Bat Counts (Hibernation)',fontsize=15)
plt.ylabel('Bat Counts (Roost)',fontsize=15)
#We can see a strong correlation between the roost and hibernation datasets
#Pearson coefficient calculation
scipy.stats.pearsonr(cleaned, cleanedRoost)
Correlation of Roost cleaned and uncleaned This is misleading because I removed commonname 'Bat' to correlate because the numbers had to be the same. I won't continue with this.
unRoost = Uncleaned_Roost_data[Uncleaned_Roost_data.commonname != 'Bat']
unRoost.head()
unRoosts = unRoost.commonname.value_counts()
scipy.stats.pearsonr(cleanedRoost, unRoosts)
#converting to int month and year have object dtypes
Roost_data.month = Roost_data.month.astype(int)
Roost_data.year = Roost_data.year.astype(int)
Roost_data.dtypes
#Selecting the months from the study and the years from when better counting began. 2014 is a part year so is not included
RoostYears_df = Roost_data[(Roost_data['month'].isin([5,6,7])) &
(Roost_data['year'] >=1999)&(Roost_data['year']< 2014)]
len(RoostYears_df)
grproost = RoostYears_df.groupby('year').apply(lambda x: x['commonname'].value_counts()).unstack().fillna(0)
grproost.unstack()
grproost.plot(figsize=(14,8))
These are the combined roost and hibernation counts for the period 1999 - 2013
Combined_period = [RoostYears_df, SelectYears_df]
Combined_df = pd.concat(Combined_period)
Combined_df = Combined_df[Combined_period[0].columns]
Combined_df.head()
len(Combined_df)
The exploration below is a comparison with the visualisations produced by the Bat Conservation Tust in the 2014 Survey as described above. I will visulaise the priority species comtained in the PrioritySpecies list and the other species contained in the OtherSpecies list. This is to estimate the integrity of the data I have, to ensure it follows similar patterns as shown in the survey.
#Reading in the data downloaded from the JNCC/DEFRA report at http://jncc.defra.gov.uk/page-4271
defra = pd.read_csv('data/Normalised.csv')
defra
#Checking the columns
defra.columns
#plotting the JNCC/DEFRA priority commmonname data
pri = defra[['Year','Lesser Horseshoe Bat','Soprano Pipistrelle','Brown Long-eared Bat','Noctule']]
ax = pri.plot(x='Year', figsize=(15,12))
plt.title('Defra Priority Bat Counts 1998 - 2014', fontsize=20, color='Black')
plt.legend(fontsize=15, loc=2)
ax.set_ylim(100,250)
plt.xlabel('Year', fontsize=20)
plt.ylabel('Bat Counts', fontsize=20)
#plotting the rest of the species
rest = defra[['Year', 'Common Pipistrelle','Serotine', "Natterer's Bat", "Daubenton's Bat"]]
ax = rest.plot(x='Year', figsize=(15,12))
plt.title('Defra Additional Bat Counts 1998 - 2014', fontsize=20, color='Black')
plt.legend(fontsize=15, loc=2)
ax.set_ylim(100,250)
plt.xlabel('Year', fontsize=20)
plt.ylabel('Bat Counts', fontsize=20)
#Selecting my combined data to plot to compare with the DEFRA data
Priority = Combined_df[(Combined_df['commonname'] =='Lesser Horseshoe Bat')|
(Combined_df['commonname'] =='Greater Horseshoe Bat')|
(Combined_df['commonname'] =='Soprano Pipistrelle')|
(Combined_df['commonname'] =='Brown Long-eared Bat')|
(Combined_df['commonname'] =='Noctule Bat')]
Priority.commonname.value_counts()
#Grouping to achieve the same shape as the DEFRA data
priority = Priority.groupby('year').apply(lambda x: x['commonname'].value_counts()).unstack().fillna(0)
priority.unstack()
#Plotting the priority species in my data
priority.plot(figsize=(15,12))
plt.legend(fontsize=15, loc=2)
plt.title('Priority Bat Counts 1999 - 2013', fontsize=20, color='Black')
plt.xlabel('Year', fontsize=20)
plt.ylabel('Bat Counts', fontsize=20)
pd.unique(Combined_df['commonname'])
#selected the non-priority species
Others = Combined_df[(Combined_df['commonname'] == 'Common Pipistrelle')|
(Combined_df['commonname'] == 'Serotine')|
(Combined_df['commonname'] == "Natterer's Bat")|
(Combined_df['commonname'] == "Daubenton's Bat")|
(Combined_df['commonname'] == "Whiskered/Brandt's Bat")]
Others.commonname.value_counts()
#shaping to plot
others = Others.groupby('year').apply(lambda x: x['commonname'].value_counts()).unstack().fillna(0)
others.unstack()
#plotting the others from my data
others.plot(figsize=(15,12))
plt.legend(fontsize=15, loc=2)
plt.title('Other Species Bat Counts 1999 - 2013', fontsize=20, color='Black')
plt.xlabel('Year', fontsize=20)
plt.ylabel('Bat Counts', fontsize=20)
#Writing the combined data to csv to create a table in postgreSQL
Combined_df.to_csv('data/CombinedCleanedData.csv', index = False)
pd.unique(Combined_df.commonname)
#dfr.loc[dfr['commonname'] != 'Bat']
#Grouping my combined roost and hibernation cleaned data to plot
grpComb = Combined_df.groupby('year').apply(lambda x: x['commonname'].value_counts()).unstack().fillna(0)
grpComb.unstack()
grpComb.plot(figsize=(15,12))
plt.title('Combined Bat Counts 1999 - 2013', fontsize=20, color='Black')
plt.xlabel('Year', fontsize=20)
plt.ylabel('Bat Counts', fontsize=20)
Interestingly the study completed by Defra - http://jncc.defra.gov.uk/page-4271 in December 2015 shows that the bats Daubenton’s bat, common pipistrelle and lesser horseshoe bat have increased in numbers which is supported by the plot above.
I will look at grey long-eared bats in the Roost dataset. These are thought to be in danger of becoming extinct. The Roost dataset covers all months.
GreyBat =Roost_data[Roost_data.commonname == 'Grey Long-eared Bat']
GreyBat
GreyBat1 = Hibernation_data[Hibernation_data.commonname == 'Grey Long-eared Bat']
GreyBat1
greys =(GreyBat, GreyBat1)
GreyBat2 = pd.concat(greys)
GreyBat2 = GreyBat2[greys[0].columns]
GreyBat2.head()
GreyBat2.columns
#lonelybats = lonelybat[['latitude','longitude','commonname']]
#lonelybats.reset_index()
#lonelybats
The format of the dataframe created indexing issues so I created a dataframe and wrote it to csv. Running the maps was causing the kernel to crash, hence I created a 'Map' notepbook for this purpose.
#df2 = pd.DataFrame({'Latitude' : [49.890102,51.049767],
# 'Longitude' : [-3.5327096,0.13863303],
# 'commonname' : ['Grey Long-eared Bat', 'Grey Long-eared Bat']})
#df2
GreyBat2.to_csv('data/greybats.csv', index = False)
I didn't have enough data on grey long-eared bats to show anything significant so after mapping i abandoned this quest.
'The following represents an assessment of the weather experienced across the UK during winter 2010 / 2011 (December 2010 to February 2011) and how it compares with the 1981 to 2010 averages.
Mean temperatures over the UK were 4.8 °C below average during December, 0.6 °C below average in January and 1.7 °C above average in February. The UK mean temperature for the winter as a whole was 2.4 °C, making it less cold than winter 2009/10 which was 1.6 °C but still the second-coldest winter since 1985/86 with 2.3 °C. Over Scotland and Northern Ireland it was the second-coldest winter since 1985/86 and 1978/79 respectively, with again only last winter having been colder. Over Northern Ireland it was equal sixth-coldest winter in the series from 1910. Over England and Wales it was the second-coldest since 1995/96, with only last winter having been colder.' Met office records - http://www.metoffice.gov.uk/climate/uk
I have taken 2010 as an example because there was a peak here and because 2010 was a cold winter, I thought this was strange.
#Plot 2010 Hibernation
One_year = Hibernation_data[(Hibernation_data['month'].isin([1,2])) &
(Hibernation_data['year']==2010)]
One_year_grp = One_year.groupby('year').apply(lambda x: x['commonname'].value_counts()).unstack().fillna(0)
pd.unique(One_year.commonname)
#getting the value counts of commonname for comparison
new = One_year.commonname.value_counts()
#Plotting hiberantion counts for 2010
my_colors = list(islice(cycle(['dodgerblue', 'cornflowerblue', 'skyblue', 'slateblue','#191970', '#001CF0', '#0038E2',
'#0055D4', '#0071C6', '#008DB8', '#00AAAA',
'#00C69C', '#00E28E', '#00FF80']), None, len(new)))
ax=new.plot(kind='bar',figsize=(14,8), color=my_colors, fontsize=12)
plt.title('Hibernation Bat Counts 2010', fontsize=20)
plt.xlabel('Bat Types (Common Name)', fontsize=15)
plt.ylabel('Total Count', fontsize=15)
#Selecting the months and the year to plot
RoostSelectYear_df = Roost_data[(Roost_data['month'].isin([5,6,7]))
& (Roost_data['year']== 2010)]
Roost_Year_grp = RoostSelectYear_df.groupby('year').apply(lambda x: x['commonname'].value_counts()).unstack().fillna(0)
Roost_Year_grp.unstack()
Roost_Year_grp.reset_index()
#getting the counts
newroost = RoostSelectYear_df.commonname.value_counts()
newroost
#plotting the rrost counts
my_colors = list(islice(cycle(['dodgerblue', 'cornflowerblue', 'skyblue', 'slateblue','#191970', '#001CF0', '#0038E2',
'#0055D4', '#0071C6', '#008DB8', '#00AAAA',
'#00C69C', '#00E28E', '#00FF80']), None, len(newroost)))
ax=newroost.plot(kind='bar',figsize=(14,8),color=my_colors, fontsize=12)
plt.title('Roost Bat Counts 2010', fontsize=20, )
plt.xlabel('Bat Types (Common Name)', fontsize=15)
plt.ylabel('Total Count', fontsize=15)
The Bat Conservation Trust report that the Pipistrelle has disparity between the Hobernation period and the Roost period and the values certainly support this. Further work could involve classifying the bats in the areas where Pipistrelle are seen in the Roost months to try to determine if the problem lies with classification - perhaps Pipistrelle are more difficult to identify during Hibernation.
roostmerge = pd.DataFrame(Roost_Year_grp)
hibmerge = pd.DataFrame(One_year_grp)
hibmerge.unstack()
hibmerge1 = hibmerge.reset_index()
hibmerge1.columns
hibmerge1.rename(columns={0: 'count'}, inplace=True)
hibmerge1.head()
roostmerge.unstack()
roostmerge1 = roostmerge.reset_index()
roostmerge1.columns
roostmerge1.rename(columns={0: 'count'}, inplace=True)
roostmerge1.columns
Add in a plot where it shows both the hibernation dataset and the roost dataset on the same plot and do the same for the whole period if possible.
fig = plt.figure()
compare = fig.add_subplot(111)
new.plot(ax=compare, color='red',fontsize=18, figsize=(15,10),ylim=[0,350])
plt.title('Total Bats 2010', fontsize=25, color='Black')
plt.xlabel('Bat Name', fontsize=20)
plt.ylabel('Total Hibernation Count',fontsize=20)
comparison = compare.twinx()
newroost.plot(ax=comparison,color='blue',fontsize=18,ylim=[0,350])
comparison.set_ylabel('Total Roost Count',fontsize=20)
import matplotlib.patches as mpatches
patch_list = ([mpatches.Patch(color='red', label='Hibernation Counts')]+
[mpatches.Patch(color='blue', label='Roost Counts')])
plt.legend(handles=patch_list, loc=(0.6,0.2), fontsize=20)
#It would seem that there is a very similar pattern in the Roost data and Hibernation data for 2010. This plot is not quite as
#good as the two bar plots. I wonder if perhaps one with circles would be better. If there's time, i'll try it.
Checking the counts of pipistrelle for roost and hibernation to see how much they differ.
#pipistrelle common name hib
pip = One_year[One_year['commonname']=='Pipistrelle']
len(pip)
#pipistrelle common name roost
pip1 = RoostSelectYear_df[RoostSelectYear_df['commonname']=='Pipistrelle']
len(pip1)
Counting the hibernation and rrost datasets to produce a correlation plot.
count=RoostSelectYear_df['commonname'].value_counts()
count1=One_year['commonname'].value_counts()
count
#RoostList
I removed the values that were in the Roost dataset but not in the hibernation dataset or vice versa for 2010. This is not concerning in any way, these species appear in other years and appears to be due to counting methods and not bat numbers. However I will check other years and if a pattern emerges with the same names appearing I will perform further analysis. I have checked the documentation from the Bat Conservation Trust and the bats removed are not specified in the Roost study.
Roost = list(pd.unique(RoostSelectYear_df.commonname))
Roost
One = list(pd.unique(One_year.commonname))
One
isin = []
for v in One:
if v not in Roost:
isin.append(v)
isin
#remove this
isn = []
for v in Roost:
if v not in One:
isin.append(v)
isn
#dropping values causing mismatch
RoostSelectYear_df = RoostSelectYear_df.drop(RoostSelectYear_df[RoostSelectYear_df.commonname == 'Bechstein\'s Bat'].index)
pd.unique(RoostSelectYear_df.commonname)
#dropping values causing mismatch
One_year = One_year.drop(One_year[One_year.commonname == 'Noctule Bat'].index)
pd.unique(One_year.commonname)
Merging the 2010 datasets for rrost and hibernation to plot together
new_hib=pd.DataFrame(count)
new_hib.reset_index()
new_hib.columns
new_hib = new_hib.rename(columns = {'commonname' : 'HibernationCount'})
new_hib.index.names = ['commonname']
new_hib.reset_index(level=0, inplace=True)
new_hib
rename the count columns to hib_count and roost_count
new_roost=pd.DataFrame(count1)
new_roost.reset_index()
new_roost.columns
new_roost = new_roost.rename(columns = {'commonname' : 'RoostCount'})
new_roost.index.names = ['commonname']
new_roost.reset_index(level=0, inplace=True)
new_roost.columns
combined = pd.merge(new_hib, new_roost, on=['commonname'])
combined
Make these plots better and show the comparison between the correlated plot and grabage plot to show the identification mey be the issue.
combined.plot.scatter(x='HibernationCount', y='RoostCount')
The survey data is taken from the included report from The Bat Consevaion Trust. It can be seen that there is little similarity between the commonnames listed and the ones in the datasets.
survey = pd.read_csv('data/surveys.csv')
survey
all_merge = pd.merge(combined, survey, on='commonname')
all_merge.drop([7], inplace=True)
all_merge
As can be seen form the table, the roost and hibernation counts have little in common, the commonnames are different for each and there are names in the data which are apparently not in the surveys.
#Adding colours
#from itertools import cycle, islice
#combined_colors = list(islice(cycle(['dodgerblue', 'cornflowerblue', 'skyblue', 'slateblue', '#00E28E','#008DB8', '#00FF80',
#'#00C69C','red', 'blue', 'green', 'cyan', 'yellow']),None, len(combined)))
combined.plot.bar(x='commonname', subplots=True,color=['blue','red'], figsize=(15,10), fontsize=12)
plt.xlabel('Common Name', fontsize=18)
plt.ylabel('Total Count',fontsize=18)
#plotting the correlation between roost and hibernation values for 2010
fig = plt.figure(figsize = (12,10))
ax.set_xlim(0,400)
ax.set_ylim(0,400)
plt.scatter(count,count1, color=['red','blue'], marker='h',s =50)
plt.grid.grid_line_color = None
axes = plt.gca()
m, b = np.polyfit(count, count1, 1)
X_plot = np.linspace(axes.get_xlim()[0],axes.get_xlim()[1],100)
plt.plot(X_plot, m*X_plot + b, '-', color='black')
plt.title('Correlation of Roost and Hibernation Counts 2010', fontsize=20, color='Black')
legendpatch_list = ([mpatches.Patch(color='red', label='2010 Roost Counts')]+
[mpatches.Patch(color='blue', label='2010 Hibernation Counts')])
plt.legend(handles=legendpatch_list, loc=(0.6,0.2), fontsize=15)
plt.xlabel('Bat Counts (Hibernation)', fontsize=15)
plt.ylabel('Bat Counts (Roost)', fontsize=15)
#We can see a strong correlation between the roost and hibernation datasets
#correlation values
scipy.stats.pearsonr(count,count1)
I'm unsure of these results and will revisit this.
#unclean hib counts
unsortedh = Hibernation_data.commonname.value_counts()
unsortedh
#unclean roost counts
unsortedr = Roost_data.commonname.value_counts()
unsortedr
#correlation values
scipy.stats.pearsonr(unsortedh,unsortedr)
'Pearson’s R2 is a measure of correlation between two or more continuous variables. It can be visualised as drawing a line of best fit on a scatter plot. The R2 coefficient value indicates how well the values correlate. A value near +1 shows that the values vary together, while a value near −1 shows that they vary in opposite directions. A value near zero shows that there’s little correlation between the data. We generally need at least 50 data items before this measure can give sensible results. In Python, Pearson’s R2 is calculated by the scipy.stats.pearsonr method. This takes two series of data, corresponding to the x and y values, and returns R2 and a p value. The p value can be used to decide whether to reject the null hypothesis of no correlation. However, a naive application of this test can be misleading. The data should be examined first to see if attempts to fit a straight line to the data are sensible. Furthermore, R2 can give confused results if the values being correlated have very different ranges. In these cases, it can be useful to normalise the data by scaling each variable to fall into the range 0–1.
-If the p-value is low (generally less than 0.05), then your correlation is statistically significant, and we can detrmine that there is correlation between the datasets.' The Open University
Combine Roost and Hibernation Data after this to look at the period from 1999 - 2013 overall. This will help to rule out incidental apparent correlation results.
#combined roost and hiberantion data from 1999-2013
SelectYear = SelectYears_df[['commonname','year']]
pivoted = SelectYear.pivot_table(index=['commonname'], aggfunc='count')
pivoted.reset_index(['year','commonname'],inplace=True)
pivoted = pivoted.rename(columns = {'year' : 'Count'})
pivoted.head(20)
#grouping to plot
SelectYear1 = RoostYears_df[['commonname','year']]
pivoted1 = SelectYear1.pivot_table(index=['commonname'], aggfunc='count')
pivoted1.reset_index(['year','commonname'],inplace=True)
pivoted1 = pivoted1.rename(columns = {'year' : 'Count'})
pivoted1.head(20)
This result shows a small, positive correlation with a high p value, so there's not much correlation as can be seen on the plot below. This is the expected result for the period and it strengthens the result for the correlation of the hibernation and roost counts for the selected year of 2010.
#Comparison of roost and hibernation counts 1999-2013
fig = plt.figure(figsize = (25,15))
ax = pivoted.plot.barh(color='blue',stacked='True', figsize = (12,8))
pivoted1.plot.barh(ax=ax,color='red')
ax.set_yticklabels(list(pivoted1['commonname']))
legendpatch_list = ([mpatches.Patch(color='blue', label='1999 - 2013 Roost Counts')]+
[mpatches.Patch(color='red', label='1999 - 2013 Hibernation Counts')])
plt.legend(handles=legendpatch_list, loc=(0.7,0.5), fontsize=12)
#Total combined counts for 1999 - 2013
totals = Combined_df[['commonname','year']]
totals = Combined_df.pivot_table(index=['commonname'], aggfunc='count')
totals.reset_index(['year','commonname'],inplace=True)
totals = pivoted1.rename(columns = {'year' : 'Count'})
totals
#plotting 1999 - 2013
ax = totals.plot(kind='bar')
ax.set_xticklabels(list(totals.commonname))
plt.title('Total Counts 1999-2013', fontsize=20, color='Blue')
plt.xlabel('Bat Types (Common Name)')
plt.ylabel('Total Count')
Selecting one year of data to compare to one year of Hibernation data
Looking at the extra months data for 2010 to reveiew exta counts within unexpected months.
#selecting 2010
One_year_extra = Hibernation_data[Hibernation_data['year']==2010]
One_year_extra.head()
#selecting 2010
Roost_year_extra = Roost_data[Roost_data['year']==2010]
Roost_year_extra.head()
#Look at actual hibernation months - not study hibernation months
winter_months_df = One_year_extra[One_year_extra['month'].isin([1,2])]
winter_months_df.head()
#hibernation value counts
winter_months_df.commonname.value_counts()
#looking at counts in the summer months in the hibernation data
summerhib_months_df = One_year_extra[One_year_extra['month'].isin([5,6,7])]
len(summerhib_months_df)
summerhib_months_df.commonname.value_counts()
#These are months where there should be no hibernating bats
extrahib_months_df =One_year_extra[One_year_extra['month'].isin([3,4,8,9,10,11,12])]
len(extrahib_months_df)
#these are the months where bats shouldn't hibernate
extrahib_months_df.commonname.value_counts()
#these are reported roosting months
summer_months_df = Roost_year_extra[Roost_year_extra['month'].isin([5,6,7])]
summer_months_df.head()
summer_months_df.commonname.value_counts()
#these are the months for hibernation in the roost data
winterroost_months_df = Roost_year_extra[Roost_year_extra['month'].isin([1,2])]
len(winterroost_months_df)
winterroost_months_df.commonname.value_counts()
#these are neither roost or hibernation months
extraroost_months_df = Roost_year_extra[Roost_year_extra['month'].isin([3,4,8,9,10,11,12])]
print(len(extraroost_months_df),len(extrahib_months_df))
#A similar result
groupeddata6 = extraroost_months_df.groupby(['commonname'])
grouped6 = groupeddata6['commonname'].aggregate('count')
groupeddata6.size()
#experimenting with grouping to try to get a sutable format extraroost_months_df.
#removing this as it only appears in the rrost counts in very small numbers
extraroost_months_df = extraroost_months_df.drop(extraroost_months_df[extraroost_months_df.commonname == "Whiskered Bat"].index)
extrar=extraroost_months_df.commonname.value_counts()
extrar
groupeddata7 = extrahib_months_df.groupby(['commonname'])
grouped7 = groupeddata7['commonname'].aggregate('count')
groupeddata7.size()
#experimenting with grouping to try to get a sutable format extrahib_months_df.
#getting counts to compare
extrah=extrahib_months_df.commonname.value_counts()
extrah
Using correlation to see the relationship between the extra months
scipy.stats.pearsonr(extrar,extrah)
The outliers in the plot below don't show statistical significance as can be shown by the value of results of the pearson coefficient. The plot has good correlation with a value of r near to 1 and a small p value.
fig = plt.figure(figsize = (12,10))
plt.scatter(extrar,extrah, color=['red','blue'], marker='h',s =50)
axes = plt.gca()
m, b = np.polyfit(extrar,extrah, 1)
plt.annotate('Outlier', xy=(20,58), xytext=(22,70),
arrowprops=dict(facecolor='black', shrink=0.05, width=8))
plt.annotate('Outlier', xy=(33,62), xytext=(37,74),
arrowprops=dict(facecolor='black', shrink=0.05, width=8))
X_plot = np.linspace(axes.get_xlim()[0],axes.get_xlim()[1],100)
plt.plot(X_plot, m*X_plot + b, '-', color='black')
plt.title('Correlation of Extra Roost and Hibernation Counts 2010', fontsize=20, color='Black')
legendpatch_list = ([mpatches.Patch(color='red', label='2010 Extra Roost Counts')]+
[mpatches.Patch(color='blue', label='2010 Extra Hibernation Counts')])
plt.legend(handles=legendpatch_list, loc=(0.6,0.2), fontsize=12)
plt.xlabel('Bat Counts (Hibernation)', fontsize=15)
plt.ylabel('Bat Counts (Roost)', fontsize=15)
#We can see a strong correlation between the roost and hibernation datasets however I will look at the outliers
#The outliers can be explained because the types of bats counted in the roost and hibernation datasets are different and the ones
#covering the extra months are likely to be more inconsistent than the official counts.
#Overall the pattern is good.
extra_hib=pd.DataFrame(extrah)
extra_hib.reset_index()
extra_hib.columns
extra_hib = extra_hib.rename(columns = {'commonname' : 'HibernationCount'})
extra_hib.index.names = ['commonname']
extra_hib.reset_index(level=0, inplace=True)
roost_e=pd.DataFrame(extrar)
roost_e.reset_index()
roost_e.columns
roost_e = roost_e.rename(columns = {'commonname' : 'RoostCount'})
roost_e.index.names = ['commonname']
roost_e.reset_index(level=0, inplace=True)
whole_extra = pd.merge(extra_hib, roost_e, on=['commonname'])
#Adding colours
from itertools import cycle, islice
e_colors = list(islice(cycle(['dodgerblue', 'cornflowerblue', 'skyblue', 'slateblue', '#00E28E','#008DB8', '#00FF80',
'#00C69C','red', 'blue', 'green', 'cyan', 'yellow']),None, len(whole_extra)))
whole_extra.plot.bar(x='commonname',subplots=True,color=['blue','red'], figsize=(15,10), fontsize=12)
plt.xlabel('Common Name', fontsize=18)
plt.ylabel('Total Count',fontsize=18)
diff1 = extrar- extrah
#Histogram of count differences which follow a normal distibution as expected
diff1.plot(kind='hist',bins=10,figsize=(10,8))
looking at pipistrelle to try to find the missing ones, hibernation months can cover 11,12,1,2. Looking at 1999-2013
#further pipistrelle exloration, where do they go in winter?
pip_df = Roost_data[Roost_data['month'].isin([1,2,11,12])]
#They go into the roost dataset by mistake?
pip = pip_df[pip_df['commonname'] == 'Pipistrelle']
len(pip)
#how many are here?
pip_df1 = Hibernation_data[Hibernation_data['month'].isin([1,2,11,12])]
#hmmm strange
pip1 = pip_df1[pip_df1['commonname'] == 'Pipistrelle']
len(pip1)
There seem to be quite a lot of Pipistrelles in the Roost data, so will look at 2010
pip_df10 = RoostSelectYear_df[RoostSelectYear_df['month'].isin([1,2,11,12])]#Roost
pip10 = pip_df10[pip_df10['commonname'] == 'Pipistrelle']
len(pip10)
piph_df10 = One_year[One_year['month'].isin([1,2,11,12])]#Hibernation
piph10 = piph_df10[piph_df10['commonname'] == 'Pipistrelle']
len(piph10)
It would appear that over one year there is not too much of an issue but over the whole period there are 734 Pipistrelles in the Roost dataset which should be in the Hibernation one.
Looking at the overall counts for 1999 - 2013
#Hibernation data
groupeddata = Hibernation_data.groupby(['commonname'])
grouped = groupeddata['commonname'].aggregate('count')
groupeddata.size()
#experimenting with grouping to try to get a sutable format
#plotting all the hibernating bats
totals = groupeddata.sum()
my_colors = list(islice(cycle(['dodgerblue', 'cornflowerblue', 'skyblue', 'slateblue','#191970', '#001CF0', '#0038E2',
'#0055D4', '#0071C6', '#008DB8', '#00AAAA',
'#00C69C', '#00E28E', '#00FF80']), None, len(new)))
grouped.plot(kind='bar', color=my_colors, figsize =(20,8), fontsize=12)
plt.title('Total Hibernating Bats 1999 - 2013', fontsize=20, color='Black')
plt.xlabel('Bat Name', fontsize=15)
plt.ylabel('Total Count',fontsize=15)
#Roost data grouping to plot
groupeddata1 = Roost_data.groupby(['commonname'])
grouped1 = groupeddata1['commonname'].aggregate('count')
groupeddata1.size()
#plotting total roost bats overall
grouped1.plot(kind='bar',color=my_colors, figsize =(20,8), fontsize=12)
my_colors = list(islice(cycle(['dodgerblue', 'cornflowerblue', 'skyblue', 'slateblue','#191970', '#001CF0', '#0038E2',
'#0055D4', '#0071C6', '#008DB8', '#00AAAA',
'#00C69C', '#00E28E', '#00FF80']), None, len(new)))
plt.title('Total Roost Bats 1999 -2013', fontsize=20, color='Black')
plt.xlabel('Bat Name', fontsize=15)
plt.ylabel('Total Count',fontsize=15)
#counts to compare
roosts = Roost_data.commonname.value_counts()
roosts
#counts to compare
hibs = Hibernation_data.commonname.value_counts()
hibs
#good correlation
scipy.stats.pearsonr(roosts,hibs)
#Getting the hibernating months
winterhib_months_df = Hibernation_data[Hibernation_data['month'].isin([1,2])]
#Getting the hibernating months
winterroost_months_df = Roost_data[Roost_data['month'].isin([1,2])]
#Getting the hibernating sumer values
summerhib_months_df = Hibernation_data[Hibernation_data['month'].isin([5,6,7])]
#Getting the roost summer values
summerroost_months_df = Roost_data[Roost_data['month'].isin([5,6,7])]
#Combining the winter months in the hibernation dataset with the winter months in the roost dataset
winter_months = [winterroost_months_df, winterhib_months_df]
winter_df = pd.concat(winter_months)
winter_df = winter_df[winter_months[0].columns]
len(winter_df)
#Total 'correct' winter months
groupeddata2 = winter_df.groupby(['commonname'])
grouped2 = groupeddata2['commonname'].aggregate('count')
groupeddata2.size()
#plotting winter bats
grouped2.plot(kind='bar',color=my_colors, figsize =(20,8), fontsize=12)
my_colors = list(islice(cycle(['dodgerblue', 'cornflowerblue', 'skyblue', 'slateblue','#191970', '#001CF0', '#0038E2',
'#0055D4', '#0071C6', '#008DB8', '#00AAAA',
'#00C69C', '#00E28E', '#00FF80']), None, len(new)))
plt.title('Total Winter Bats 1999 -2013', fontsize=20, color='Black')
plt.xlabel('Bat Name', fontsize=15)
plt.ylabel('Total Count',fontsize=15)
#Combining the summer months in the roost dataset with the summer months in the hibernation dataset
summer_months = [summerroost_months_df, summerhib_months_df]
summer_df = pd.concat(summer_months)
summer_df = summer_df[summer_months[0].columns]
len(summer_df)
#Total 'correct' summer months
groupeddata3 = summer_df.groupby(['commonname'])
grouped3 = groupeddata3['commonname'].aggregate('count')
groupeddata3.size()
#plotting total summer bats
grouped3.plot(kind='bar',color=my_colors, figsize =(20,8), fontsize=12)
my_colors = list(islice(cycle(['dodgerblue', 'cornflowerblue', 'skyblue', 'slateblue','#191970', '#001CF0', '#0038E2',
'#0055D4', '#0071C6', '#008DB8', '#00AAAA',
'#00C69C', '#00E28E', '#00FF80']), None, len(new)))
plt.title('Total Summer Bats 1999 -2013', fontsize=20, color='Black')
plt.xlabel('Bat Name', fontsize=15)
plt.ylabel('Total Count',fontsize=15)
#Roost counts
fig = plt.figure(figsize = (20,10))
# axes object for first subplot.
# 111 is a grid of 1 by 1 subplots
SummerAxes = fig.add_subplot(111)
grouped1.plot(kind='bar',ax=SummerAxes, ylim=[0,7000], color='red',fontsize=15)
SummerAxes.set_ylabel('Roost Counts',fontsize=20)
# second axes object representing the extra counts from the hibernation dataset, in the same place (twinned
# with the SummerAxes) shared (twiny() would allow sharing the y-axes).
ExtrasAxes = SummerAxes.twinx()
# plot onto the ExtrasAxes:
grouped3.plot.line(ax=ExtrasAxes,ylim=[0,7000], color='blue',fontsize=15)
# set the Extras Axes y-axis label,
ExtrasAxes.set_ylabel('Extra summmer Counts',fontsize=20)
# The mpatches.Patch() from matplotlib creates an element that can
# be used in the handles parameter of the plt.legend() function to create an entry in the legend.
import matplotlib.patches as mpatches
legendpatch_list = ([mpatches.Patch(color='red', label='Roost Counts')]+
[mpatches.Patch(color='blue', label='Extra summer Counts')])
#the x,y values for the location are figure coordinates between 0 and 1.
# If you exceed 1, the legend is drawn outside the figure as shown.
plt.legend(handles=legendpatch_list, loc=(1.1,0.7), fontsize=20)
# and a title
plt.title('Roost and Roost with extra summer counts compared',
fontsize=20)
#It would seem that there is a very similar pattern in the Roost data for the original data and the data selected for the correct
#months combined with the hibernation summer months data
#Hibernation Counts for the winter months
fig = plt.figure(figsize = (20,10))
# axes object for first subplot.
# 111 is a grid of 1 by 1 subplots
WinterAxes = fig.add_subplot(111)
grouped.plot(kind='bar',ax=WinterAxes, ylim=[0,7000], color='yellow',fontsize=15)
WinterAxes.set_ylabel('Total Hibernation Counts',fontsize=20)
# second axes object representing the extra counts from the roost dataset, in the same place (twinned
# with the SummerAxes) shared (twiny() would allow sharing the y-axes).
ExtraAxes = WinterAxes.twinx()
# plot onto the ExtrasAxes:
grouped2.plot.line(ax=ExtraAxes,ylim=[0,7000], color='green',fontsize=15 )
# set the Extras Axes y-axis label,
ExtraAxes.set_ylabel('Total winter Counts',fontsize=20)
# The mpatches.Patch() from matplotlib creates an element that can
# be used in the handles parameter of the plt.legend() function to create an entry in the legend.
import matplotlib.patches as mpatches
legendpatch_list = ([mpatches.Patch(color='yellow', label='Total Hibernation Counts')]+
[mpatches.Patch(color='green', label='Total winter Counts')])
#the x,y values for the location are figure coordinates between 0 and 1.
# If you exceed 1, the legend is drawn outside the figure as shown.
plt.legend(handles=legendpatch_list, loc=(1.1,0.7), fontsize=20)
# and a title
plt.title('Hibernation and extra winter counts compared',
fontsize=20)
#It would seem that there is a similar pattern in the Hibernation data for the original data and the data selected for the correct
#months combined with the roost winter months data
#getting the summer value counts
summer = summer_df.commonname.value_counts()
summer
#getting the winter value counts
winter = winter_df.commonname.value_counts()
winter
Using merge to have both on the same plot for the sorted hib and roost months and will rename the count columns
whole_hib=pd.DataFrame(winter)
whole_hib.reset_index()
whole_hib.columns
whole_hib = whole_hib.rename(columns = {'commonname' : 'HibernationCount'})
whole_hib.index.names = ['commonname']
whole_hib.reset_index(level=0, inplace=True)
whole_roost=pd.DataFrame(summer)
whole_roost.reset_index()
whole_roost.columns
whole_roost = whole_roost.rename(columns = {'commonname' : 'RoostCount'})
whole_roost.index.names = ['commonname']
whole_roost.reset_index(level=0, inplace=True)
whole_merged = pd.merge(whole_hib, whole_roost, on=['commonname'])
whole_merged.head()
#Adding colours
from itertools import cycle, islice
merge_colors = list(islice(cycle(['dodgerblue', 'cornflowerblue', 'skyblue', 'slateblue','#191970', '#001CF0', '#0038E2',
'#0055D4', '#0071C6', '#008DB8', '#00AAAA',
'#00C69C', '#00E28E', '#00FF80']), None, len(whole_merged)))
whole_merged.plot.bar(x='commonname',subplots=True,color=['blue','red'], figsize=(16,10), fontsize=12)
plt.xlabel('Common Name', fontsize=18)
plt.ylabel('Total Count',fontsize=18)
#Winter and summer counts compared
fig = plt.figure(figsize = (20,10))
# axes object for first subplot.
# 111 is a grid of 1 by 1 subplots
TheAxes = fig.add_subplot(111)
grouped2.plot(kind='bar',ax=TheAxes, ylim=[0,5000], color='blue',fontsize=15)
TheAxes.set_ylabel('Total Winter Counts',fontsize=20)
TheAxes.set_xlabel('Common Name',fontsize=20)
# second axes object representing the extra counts from the roost dataset, in the same place (twinned
# with the SummerAxes) shared (twiny() would allow sharing the y-axes).
OtherAxes = TheAxes.twinx()
# plot onto the ExtrasAxes:
grouped3.plot.line(ax=OtherAxes,ylim=[0,5000], color='red',fontsize=15)
# set the Extras Axes y-axis label,
OtherAxes.set_ylabel('Total Summer Counts',fontsize=20)
# The mpatches.Patch() from matplotlib creates an element that can
# be used in the handles parameter of the plt.legend() function to create an entry in the legend.
import matplotlib.patches as mpatches
legendpatch_list = ([mpatches.Patch(color='blue', label='Total Winter Counts')]+
[mpatches.Patch(color='red', label='Total Summer Counts')])
#the x,y values for the location are figure coordinates between 0 and 1.
# If you exceed 1, the legend is drawn outside the figure as shown.
plt.legend(handles=legendpatch_list, loc=(1.1,0.7), fontsize=20)
# and a title
plt.title('Winter and Summer counts compared 1999-2013', fontsize=20)
#Here we start to see a real difference in counts which takes me to my Question 2 which is comparing the numbers of hibernating
#bats with the numbers of roosting bats.
scipy.stats.pearsonr(summer,winter)
#plotting correlation between summer and winter counts
fig = plt.figure(figsize = (12,10))
plt.scatter(winter,summer, color=['red','blue'], marker='h',s =50)
axes = plt.gca()
m, b = np.polyfit(winter,summer, 1)
plt.annotate('Outlier', xy=(2950,3000), xytext=(3100,3200),
arrowprops=dict(facecolor='black', shrink=0.05, width=8))
plt.annotate('Outlier', xy=(3100,4500), xytext=(3200,4800),
arrowprops=dict(facecolor='black', shrink=0.05, width=8))
X_plot = np.linspace(axes.get_xlim()[0],axes.get_xlim()[1],100)
plt.plot(X_plot, m*X_plot + b, '-', color='black')
plt.title('Correlation of Roost and Hibernation Counts 1999-2013', fontsize=20, color='Black')
legendpatch_list = ([mpatches.Patch(color='red', label='Roost Counts')]+
[mpatches.Patch(color='blue', label='Hibernation Counts')])
plt.legend(handles=legendpatch_list, loc=(0.6,0.2), fontsize=15)
plt.xlabel('Bat Counts (Hibernation)', fontsize=15)
plt.ylabel('Bat Counts (Roost)', fontsize=15)
#We can see a strong correlation between the roost and hibernation datasets
So how many bats make it out of hibernation? I would have to take individual years of data, so if I look at winter 2009 - 2010 then I would compare that to summer 2010 I could do this for a few examples and then could plot them together.
#Having a look at the pattern of the cross-over data, where the bats are coming out of hibernation
cross_months_df = Roost_data[Roost_data['month'].isin([3,4])]
over_months_df = Hibernation_data[Hibernation_data['month'].isin([3,4])]
#Combining the extra months in the roost dataset with the extra months in the hibernation dataset
crossover_months = [cross_months_df, over_months_df]
crossover_df = pd.concat(crossover_months)
crossover_df = crossover_df[crossover_months[0].columns]
crossover_df.head()
crossover_df['commonname'].value_counts()
#grouping to plot
groupeddata4 = crossover_df.groupby(['commonname'])
grouped4 = groupeddata4['commonname'].aggregate('count')
groupeddata4.size()
#plotting the crossover values
grouped4.plot(kind='bar',figsize = (20,10))
plt.title('Total crossover Bats Spring', fontsize=20, color='Blue')
plt.xlabel('Bat Name')
plt.ylabel('Total Count')
#Having a look at the pattern of the cross-over data, where the bats are coming out of hibernation
cross_monthsaut_df = Roost_data[Roost_data['month'].isin([8,9,10,11,12])]
over_monthsaut_df = Hibernation_data[Hibernation_data['month'].isin([8,9,10,11,12])]
#Combining the summer months in the roost dataset with the summer months in the hibernation dataset
crossover_months_autumn = [cross_monthsaut_df, over_monthsaut_df]
crossover_aut_df = pd.concat(crossover_months_autumn)
crossover_aut_df = crossover_aut_df[crossover_months_autumn[0].columns]
crossover_aut_df.head()
#grouping the autumn months to plot
groupeddata10 = crossover_aut_df.groupby(['commonname'])
grouped10 = groupeddata10['commonname'].aggregate('count')
groupeddata10.size()
#spring months
grouped10.plot(kind='bar',figsize = (20,10))
plt.title('Total crossover Bats Spring', fontsize=20, color='Blue')
plt.xlabel('Bat Name')
plt.ylabel('Total Count')
#comparison of spring and autumn counts
fig = plt.figure(figsize = (20,10))
# axes object for first subplot.
# 111 is a grid of 1 by 1 subplots
Axes1 = fig.add_subplot(111)
grouped4.plot(kind='bar',ax=Axes1, ylim=[0,3000], color='blue',fontsize=15)
Axes1.set_ylabel('Extra Spring Counts',fontsize=20)
# second axes object representing the extra counts from the roost dataset, in the same place (twinned
# with the SummerAxes) shared (twiny() would allow sharing the y-axes).
OAxes1 = Axes1.twinx()
# plot onto the ExtrasAxes:
grouped10.plot.line(ax=OAxes1,ylim=[0,3000], color='red',fontsize=15 )
# set the Extras Axes y-axis label,
OAxes1.set_ylabel('Extra Autumn Counts',fontsize=20)
# The mpatches.Patch() from matplotlib creates an element that can
# be used in the handles parameter of the plt.legend() function to create an entry in the legend.
import matplotlib.patches as mpatches
legendpatch_list = ([mpatches.Patch(color='blue', label='Extra Spring Counts')]+
[mpatches.Patch(color='red', label='Extra Autumn Counts')])
#the x,y values for the location are figure coordinates between 0 and 1.
# If you exceed 1, the legend is drawn outside the figure as shown.
plt.legend(handles=legendpatch_list, loc=(1.1,0.7), fontsize=20)
# and a title
plt.title('Extra Spring and Autumn Counts Compared',
fontsize=20)
#Here we start to see a real difference in counts which takes me to my Question 2 which is comparing the numbers of hibernating
#bats with the numbers of roosting bats.
#crossover autumn months
grouped10.plot(figsize = (20,10))
plt.title('Total crossover Bats Autumn', fontsize=20, color='Blue')
plt.xlabel('Bat Name')
plt.ylabel('Total Count')
#How does this compare with the months of Jan and Feb when according to the Bat Conservation Trust most of the
#counting takes place?
hib_months_df = Hibernation_data[Hibernation_data['month'].isin([1,2])]
ros_months_df = Roost_data[Roost_data['month'].isin([1,2])]
#Combining the study months in the roost dataset with the study months in the hibernation dataset
test_months = [hib_months_df, ros_months_df]
study_df = pd.concat(test_months)
study_df = study_df[test_months[0].columns]
study_df.head()
#grouping the study months
groupeddata5 = study_df.groupby(['commonname'])
grouped5 = groupeddata5['commonname'].aggregate('count')
groupeddata5.size()
names = []
for i in study_df.commonname:
if i not in names:
names.append(i)
names
#Adding colours
from itertools import cycle, islice
my_colors = list(islice(cycle(['dodgerblue', 'cornflowerblue', 'skyblue', 'slateblue']), None, len(grouped5)))
#plotting the total hibernation
grouped5.plot(kind='bar',figsize = (18,12),color=my_colors)
plt.title('Total hibernation study Bats', fontsize=25, color='darkblue')
plt.xlabel('Bat Name', fontsize=15)
plt.ylabel('Total Count',fontsize=15)
It would appear that the cross-over months have small values in comparison to the hibernation and roost and this would be explained study periods. This would suggest that most of the counts are taking place in the correct months. The erroneous values from each dataset have been consolidated into the months as described in the study details on the Bat Conservation Trust website and these combined and cleaned datasets will be used to attempt to answer the question of how the bats are surviving the hibernation process. If I took this study further it would be to carry out analysis on how the wether patters affect this process and whether the numbers or types of bats (or both) were affected by particular weather conditions. It could well be similar to birds where they may start producing babies too early if late winter is warm and then bad weather hits again and the babaies don't survive. If this was to be studied I would have to look at the numbers counted coming out of hibernation the next year, to see if the babies had survived the previous year and were in the counts for this year.
2010
For this analysis: I will select individual years of data, so if I look at winter 2010 then I would compare that to summer 2010 I could do this for a few examples and then could plot them together. This should provide an indication of the patterns of bat numbers of the hibernating counts and the roost counts.
#Combining the roost dataset with the hibernation dataset 2010
totals2010 = [One_year, RoostSelectYear_df]
totals2010_df = pd.concat(totals2010)
totals2010_df= totals2010_df[totals2010[0].columns]
totals2010_df.head()
#grouping to plot
totals2010_df = totals2010_df[['commonname','year']]
Combined10 = totals2010_df.pivot_table(index=['commonname'], aggfunc='count')
Combined10 = Combined10.rename(columns = {'year' : 'counts'})
Combined10.columns
#Total Bats 2010
my_colors = list(islice(cycle(['dodgerblue', 'cornflowerblue', 'skyblue', 'slateblue','#191970', '#001CF0', '#0038E2',
'#0055D4', '#0071C6', '#008DB8', '#00AAAA',
'#00C69C', '#00E28E', '#00FF80']), None, len(grouped5)))
Combined10.plot.pie(y='counts',autopct='%.2f',labels=['','','','','','','','','','','','','',''],
figsize=(14, 18),colors=my_colors,fontsize=16)
#Combined10.plot(y='counts',kind='pie', fontsize=15, colors=my_colors,figsize =(20,20))
plt.axis('equal')
plt.ylabel('')
plt.legend(labels=Combined10.index,loc=(0.8,0.82), fontsize=14)
plt.title('Total Bats 2010', fontsize=20, color='black')
plt.show()
One_year, RoostSelectYear_df gives one year (2010) of data to compare
#grouping to compare counts
hibyear = One_year.groupby(['commonname'])
hibyeargroup = hibyear['commonname'].aggregate('count')
hibyear.size()
stats = hibyeargroup.describe()
stats
#grouping to compare counts
roostyear = RoostSelectYear_df.groupby(['commonname'])
roostyeargroup = roostyear['commonname'].aggregate('count')
roostyear.size()
stats1 = roostyeargroup.describe()
stats1
Correlate
#Hibernation and Roost counts compared 2010
fig = plt.figure(figsize = (20,10))
# axes object for first subplot.
# 111 is a grid of 1 by 1 subplots
Axes = fig.add_subplot(111)
hibyeargroup.plot(kind='bar',ax=Axes, ylim=[0,500], color='blue',fontsize=15)
Axes.set_ylabel('2010 Hibernation Counts',fontsize=20)
# second axes object representing the extra counts from the roost dataset, in the same place (twinned
# with the SummerAxes) shared (twiny() would allow sharing the y-axes).
OAxes = Axes.twinx()
# plot onto the ExtrasAxes:
roostyeargroup.plot.line(ax=OAxes,ylim=[0,500], color='red',fontsize=15 )
# set the Extras Axes y-axis label,
OAxes.set_ylabel('2010 Roost Counts',fontsize=20)
# The mpatches.Patch() from matplotlib creates an element that can
# be used in the handles parameter of the plt.legend() function to create an entry in the legend.
import matplotlib.patches as mpatches
legendpatch_list = ([mpatches.Patch(color='blue', label='2010 Hibernation Counts')]+
[mpatches.Patch(color='red', label='2010 Roost Counts')])
#the x,y values for the location are figure coordinates between 0 and 1.
# If you exceed 1, the legend is drawn outside the figure as shown.
plt.legend(handles=legendpatch_list, loc=(1.1,0.7), fontsize=20)
# and a title
plt.title('2010 Hibernation and Roost counts compared',
fontsize=20)
#Here we start to see a real difference in counts which takes me to my Question 2 which is comparing the numbers of hibernating
#bats with the numbers of roosting bats.
It would seem that because there are different bats studied that this question would have to be answered with a specific study of particular bats. I can correlate the numbers but would have to take into account that these are different species of bat that I am comparing. However it may be that it is difficult to tell anyway and that the classifications made in the data collections have errors. Therefore taking the total count and comparing it may produce a guide of sorts.
pd.options.mode.chained_assignment = None # default='warn'
#counts hibernation
counting=One_year.commonname.value_counts()
len(counting)
#counts roost
counting1 = RoostSelectYear_df.commonname.value_counts()
len(counting1)
diff = counting1 - counting
Percentages rise in bat counts 1999-2013 and difference between roost and hibernation. Look at the bats of concern and try a histogram. Remember the map to illustrate. Find how to label points on a scatter plot. I chose the roost counts because I felt they would be most closely matched to the hibernation survey "As a general rule, trends calculated from the Field and Waterway Survey are considered to be most robust,followed by the Hibernation Survey and then the Roost Counts." The grey long-eared bat is one of the rarest bats in the UK, with a population estimated at 1,000 individuals and a distribution that is restricted mainly to the southern coast of England and Wales. Identify and monitor maternity roosts and hibernation sites is one of the ways of monitoring this species.
#removing to compare counts
One_year = One_year.drop(One_year[One_year.commonname == 'Noctule Bat'].index)
pd.unique(One_year.commonname)
new_hib= pd.DataFrame(counting)
#correalting the roost and hibernation counts
fig = plt.figure(figsize = (12,10))
plt.scatter(counting, counting1,color=['red','blue'],s=60)
axes = plt.gca()
m, b = np.polyfit(counting, counting1, 1)
X_plot = np.linspace(axes.get_xlim()[0],axes.get_xlim()[1],100)
plt.plot(X_plot, m*X_plot + b, '-', color='black')
plt.title('Correlation of Roost and Hibernation Counts 2010', fontsize=15, color='Black')
legendpatch_list = ([mpatches.Patch(color='red', label='Roost Counts')]+
[mpatches.Patch(color='blue', label='Hibernation Counts')])
plt.legend(handles=legendpatch_list, loc=(0.6,0.2), fontsize=12)
plt.xlabel('Bat Counts (Hibernation)', fontsize=15)
plt.ylabel('Bat Counts (Roost)', fontsize=15)
#We can see a strong correlation between the roost and hibernation datasets
scipy.stats.pearsonr(counting,counting1)
This shows good correlation which would suggest that overall the bats survived the winter well, however this also takes into account new babies born, but it shows overall the survival rate was good. This one looks better than the one earlier in investigations.
#getting names
names = pd.unique(Combined_df.commonname)
names
StudyBats= SelectYears_df.copy()
StudyBats = StudyBats.drop(StudyBats[StudyBats.commonname == 'Greater Horseshoe Bat'].index)
StudyBats = StudyBats.drop(StudyBats[StudyBats.commonname == 'Whiskered Bat'].index)
StudyBats = StudyBats.drop(StudyBats[StudyBats.commonname == "Brandt's Bat"].index)
StudyBats = StudyBats.drop(StudyBats[StudyBats.commonname == "Whiskered/Brandt's Bat"].index)
StudyBats = StudyBats.drop(StudyBats[StudyBats.commonname == 'Grey Long-eared Bat'].index)
StudyBats = StudyBats.drop(StudyBats[StudyBats.commonname == "Bechstein's Bat"].index)
pd.unique(StudyBats.commonname)
#removing bats not in the main studies by JNCC
#Getting the counts per year to calculate the percentage change
StudyBats = StudyBats[['commonname','year']]
pivot = StudyBats.pivot_table(index=['year'], aggfunc='count')
pivot.reset_index(['year','commonname'],inplace=True)
pivot.columns
#plotting the counts to compare with the study
pivot.plot(kind = 'bar',x='year', y='commonname', figsize = (14,10),color='lightblue',ylim=[0,1000])
plt.title('Number of bats from the C8 Mammals Study Groups', fontsize=20, color='black')
plt.show()
#http://jncc.defra.gov.uk/page-4271
The results look to be around a 50% increase in numbers from 1999 to 2013 but the percentage calculations will show whether this is correct.
Not a good visulaisation
#Getting the counts per year to calculate the percentage change
SelectYears_df = SelectYears_df[['commonname','year']]
pivoted = SelectYears_df.pivot_table(index=['year'], aggfunc='count')
pivoted.reset_index(['year','commonname'],inplace=True)
pivoted = pivoted.rename(columns = {'commonname' : 'counts'})
pivoted.columns
whole_merged['Colour'] = 'white'
whole_merged.loc[(whole_merged.commonname == 'Soprano Pipistrelle'),['Colour']] = 'dodgerblue'
whole_merged.loc[(whole_merged.commonname == 'Pipistrelle'),['Colour']] = '#191970'
whole_merged.loc[(whole_merged.commonname == 'Common Pipistrelle'),['Colour']] = 'slateblue'
whole_merged.loc[(whole_merged.commonname == 'Brown Long-eared Bat'),['Colour']] = '#00FF80'
whole_merged.loc[(whole_merged.commonname == "Brandt's Bat"),['Colour']] = 'red'
whole_merged.loc[(whole_merged.commonname == 'Noctule Bat'),['Colour']] = 'blue'
whole_merged.loc[(whole_merged.commonname == "Bechstein's Bat"),['Colour']] = 'cyan'
whole_merged.loc[(whole_merged.commonname == 'Greater Horseshoe Bat'),['Colour']] = '#00C69C'
whole_merged.loc[(whole_merged.commonname == 'Serotine'),['Colour']] = '#00AAAA'
whole_merged.loc[(whole_merged.commonname == "Daubenton's Bat"),['Colour']] = '#008DB8'
whole_merged.loc[(whole_merged.commonname == "Whiskered/Brandt's Bat"),['Colour']] = 'yellow'
whole_merged.loc[(whole_merged.commonname == 'Grey Long-eared Bat'),['Colour']] = 'grey'
whole_merged.loc[(whole_merged.commonname == 'Lesser Horseshoe Bat'),['Colour']] = '#00E28E'
whole_merged.loc[(whole_merged.commonname == 'Noctule Bat'),['Colour']] = 'green'
whole_merged.loc[(whole_merged.commonname == "Natterer's Bat"),['Colour']] = 'black'
whole_merged.loc[(whole_merged.commonname == 'Whiskered Bat'),['Colour']] = '#0038E2'
whole_merged.head()
pivoted.plot.scatter(x='year', y='counts', figsize = (14,10), s=pivoted['counts'],color=(0, 0, 1),ylim=[0,1000])
plt.ticklabel_format(useOffset=False, style='plain')
plt.xticks(pivoted['year'])
plt.title('Number of bats each year', fontsize=20, color='black')
This represents the overall bat counts for the period 1999 - 2013. I calculated the percentage change each year and the overall percentage change below.
#grouping combined data to plot
Combined = Combined_df[['commonname','year']]
dpivoted = Combined.pivot_table(index=['commonname'], aggfunc='count')
dpivoted = dpivoted.rename(columns = {'year' : 'counts'})
dpivoted.columns
dpivoted
The headline measure is a composite index of eight bat species: serotine, Daubenton's bat, Natterer’s bat, noctule, common pipistrelle, soprano pipistrelle, brown long-eared bat, and lesser horseshoe bat. http://jncc.defra.gov.uk/page-4271
#plotting the total in the bat study
my_colors = list(islice(cycle(['dodgerblue', 'cornflowerblue', 'skyblue', 'slateblue','#191970', '#001CF0', '#0038E2',
'#0055D4', '#0071C6', '#008DB8', '#00AAAA',
'#00C69C', '#00E28E', '#00FF80']), None, len(grouped5)))
#dpivoted.plot.pie(y='counts', figsize=(15, 15), colors=my_colors)
dpivoted.plot.pie(y='counts',autopct='%.2f',labels=['','','','','','','','','','','','','','',''],
figsize=(14, 18),colors=my_colors,fontsize=16)
plt.axis('equal')
plt.ylabel('')
plt.legend(labels=dpivoted.index,loc=(0.8,0.82), fontsize=14)
plt.title('Total Bats 1999 - 2013', fontsize=20, color='black')
plt.show()
#Totals per year
grpComb['totals'] = grpComb.sum(axis=1)
#Plotting the totals for each year between 1999 and 2013
grpComb['totals'].plot(kind='bar')
print(grpComb.totals)
Percentages
Calculating the annual percentages to determine is the changes represent a similar pattern to the Defra report as described above. C8. Mammals of the wider countryside (bats) http://jncc.defra.gov.uk/page-4271
def percentIncrease(x,y):
z = ((y-x)/x)*100
return z
x=2222
y=2226
percentIncrease(x,y)
x=2226
y=1932
percentIncrease(x,y)
x=1932
y=2554
percentIncrease(x,y)
x=2554
y=2552
percentIncrease(x,y)
x=2552
y=2760
percentIncrease(x,y)
x=2760
y=2776
percentIncrease(x,y)
x=2776
y=2878
percentIncrease(x,y)
x=2878
y=2854
percentIncrease(x,y)
x=2854
y=3410
percentIncrease(x,y)
x=3410
y=3598
percentIncrease(x,y)
x=3598
y=3852
percentIncrease(x,y)
x=3852
y=3578
percentIncrease(x,y)
x=3578
y=3140
percentIncrease(x,y)
x=3140
y=3500
percentIncrease(x,y)
x=2222
y=3500
percentIncrease(x,y)
Adding the annual percentage changes to a dataframe to plot.
percentChange = {'Year':[2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010, 2011,2012,2013],
'Change':[0.17969451931716085,15.217391304347828,24.353954581049333,0.07836990595611285,7.536231884057972,
0.5763688760806917,
3.5441278665740095,0.8409250175192713,16.3049853372434,5.225125069483046,6.5939771547248185,7.657909446618222,
13.949044585987261,10.285714285714285]}
df = pd.DataFrame(percentChange)
df.plot(x='Year', y='Change')
This plot does not reveal useful information. It shows that there were some drops around 2003 - 2007, but that the trends seem to have stabilised. I found that my data showed a percentage change of 57.5% over the period 1999 to 2013 inclusive, compared to the JNCC report wich showed an overall 23% increase. However the data they have used is complete and has actual counts of individual bats rather than just sightings. The patterns and trends however are similar and the overall story of bats in the UK over the period is consistent in patterns if not percentages.
Weather information used as a guide
The counts for 2001 are lower than those for 1999 and 2000 but this is likely to be due to severe winter weather as reported on the Met Office website at: http://www.metoffice.gov.uk/climate/uk/summaries/2001/february February 2001
*The averaging period used for the following assessment was 1961-1990.
*UK overview
*Wet weather returned to eastern parts of England at the start. As high pressure began to dominate it became drier with sunshine by day and frost at night. Turned cold towards the end with quite significant snowfalls in Scotland and northern England.
*In Scotland, heavy snow at the start and end of the month brought power cuts and disrupted road, rail and air transport.
This would mean that it is still acceptable to treat the period as 1999 - 2013
2010 http://www.metoffice.gov.uk/climate/uk/summaries/2011/winter Winter 2010/11
The following represents an assessment of the weather experienced across the UK during winter 2010 / 2011 (December 2010 to February 2011) and how it compares with the 1981 to 2010 averages.
Mean temperatures over the UK were 4.8 °C below average during December, 0.6 °C below average in January and 1.7 °C above average in February. The UK mean temperature for the winter as a whole was 2.4 °C, making it less cold than winter 2009/10 which was 1.6 °C but still the second-coldest winter since 1985/86 with 2.3 °C. Over Scotland and Northern Ireland it was the second-coldest winter since 1985/86 and 1978/79 respectively, with again only last winter having been colder. Over Northern Ireland it was equal sixth-coldest winter in the series from 1910. Over England and Wales it was the second-coldest since 1995/96, with only last winter having been colder.
December was exceptionally cold across the UK; the coldest December in over 100 years, with the highest number of air frosts in at least the last 50 years. It was the coldest calendar month since February 1986, and in Northern Ireland the coldest calendar month of the last 100 years. Chilly conditions persisted in early January, before a milder spell around mid-month then a return to colder weather. Mean temperatures finished somewhat below normal. Temperatures were generally above average in February, making it the mildest February since 2002. The number of air frosts was the fourth lowest in February in the last 50 years.
Precipitation amounts over the UK were well below average during both December (40% of average) and January (84%) but above in February (129%). In December, there were widespread snowfalls in the first week and from mid-month until Christmas. Iit was the driest December over the UK since 1963 and the third driest in the 100-year series. February was particularly wet in parts of northern England and southern Scotland, where it was one of the wettest Februarys in the last 100 years.
There was a fall in numbers in 2012 and the Met Office has this for that year:
http://www.metoffice.gov.uk/climate/uk/summaries/2013/winter
*Winter 2012/13 The following represents an assessment of the weather experienced across the UK during Winter 2012/2013 (December 2012 to February 2013) and how it compares with the 1981 to 2010 average.
*The mean temperature over the UK for winter was 3.3 °C which is 0.4 °C below the long term average. December was equal to the long term average for the month, January was 0.3 °C below, February was 0.9 °C below and at 2.8 °C was the coldest month of the season. Spells of notably mild weather occurred in late December and early January, and notably cold weather in early December, mid to late January, and the latter part of February.
*Winter overall for the UK was marginally wetter than the long term average with 106%, although much of Highland Scotland was drier than average. It was the wettest December since 1999 with 149% of long term average rain; considerable disruption from flooding events occurred in the run-up to Christmas. January was slightly drier than average for the UK as a whole (91%), with a few localised exceptions in some coastal areas and Northern Ireland. February was also dry (68%). There was a period of widespread snowfall across much of the country from mid to late January as frontal systems hit colder air, causing considerable disruption. This was followed by a rapid thaw in the last few days of the month; snowmelt and further rain resulting in some further localised flooding. Further snow events in February were generally short-lived.
This would suggest that the wet weather may be unsuitable for bats and that the colder weather is better for them. Perhaps this makes them better survivors than competitors and they perhaps fare better in colder weather meaning that animals who don't fare so well are not there to fight for food. This would mean more was available for bats.
grpComb.columns
grpComb = grpComb[["Bechstein's Bat", "Brandt's Bat", 'Brown Long-eared Bat',
'Common Pipistrelle', "Daubenton's Bat", "Greater Horseshoe Bat",
'Grey Long-eared Bat', 'Lesser Horseshoe Bat', "Natterer's Bat",
'Noctule Bat', 'Pipistrelle', 'Serotine', 'Soprano Pipistrelle',
'Whiskered Bat', "Whiskered/Brandt's Bat"]]
grpComb.plot.area(figsize=(15, 15));
newgrp = grpComb.copy()
newgrp.drop('totals', axis=1)
newgrp.columns
figsize=(15, 15)
MapBats = Combined_df[['latitude','longitude','commonname']]
MapBats.to_csv('data/combinedBats.csv', index = False)
Combined_df.columns
year = list(newgrp.year)
import matplotlib.patches as mpatches
# We're going to build the legend patch list one element at a time;
# it starts empty.
legendpatch_list = []
colours = {'Soprano Pipistrelle':'red', 'Common Pipistrelle':'red', "Brandt's Bat":'red',
'Western Barbastelle':'red', 'Pipistrelle':'red', 'Brown Long-eared Bat':'red',
"Daubenton's Bat":'red', 'Bat':'red', 'Serotine':'red', 'Greater Horseshoe Bat':'red',
'Grey Long-eared Bat':'red', 'Long-eared Bat species':'red',
'Lesser Horseshoe Bat':'red', "Bechstein's Bat":'red', "Whiskered/Brandt's Bat":'red',
"Natterer's Bat":'red', 'Mouse-eared Bat':'red', 'Lesser Noctule':'red',
'Whiskered Bat':'red', 'Myotis Bat species':'red', 'Noctule Bat':'red'}
# We want to loop for each Region
for key in pivoted:
# for each type create the legend patch
legendpatch_list = legendpatch_list+([mpatches.Patch(color=colours[key], label=key)])