import pandas as pd
import matplotlib.pyplot as plt
%pylab inline
def time_ticks(x, pos):
d = datetime.timedelta(seconds=x)
return str(d)
def timestring_to_seconds(time_string):
# if type(time_string) != str:
# return numpy.nan
parts = time_string.split(':')
return int(parts[0])*3600 + int(parts[1])*60 + int(parts[2])
# Convert an int seconds into a str hh:mm:ss
def seconds_to_timestring(time_seconds):
if math.isnan(time_seconds):
return nan
# If the number is negative, work with the absolute value to avoid modulo with negative numbers.
time = abs(time_seconds)
hours = int(time/3600)
minutes = int((time % 3600)/60)
seconds = time % 60
output = "{0}:{1:02.0f}:{2:04.1f}".format(hours, minutes, seconds)
if time_seconds < 0:
output = '-' + output
return output
def finish_time_histogram(data, color=None):
fig, ax = pyplot.subplots(figsize=(12,8))
ax.xaxis.set_major_formatter(matplotlib.ticker.FuncFormatter(time_ticks))
ax.xaxis.set_major_locator(MultipleLocator(300))
ax.set_title('Taipei Marathon Finishers in 2013~2017', color=('b'), size=30)
ax.yaxis.set_label_text('Number of finishers')
pylab.xticks(rotation='vertical')
return pyplot.hist(data.finish_seconds, bins=bins, color=color)
data = pd.read_csv('allTPE_Runner2013_2017.csv')
mData=data[data.Full_half=='MA']
mData['finish_seconds'] = mData['Net_Time'].apply(timestring_to_seconds)
male_female = data.groupby(['Year','Full_half']).Gender.value_counts().unstack()
male_female_normed = male_female.div(male_female.sum(1), axis=0)
ax = male_female_normed.plot(kind='barh', stacked=True, fontsize=16, title='Male/Female Ratio')
ax.xaxis.set_major_locator(plt.MultipleLocator(0.1))
ax.invert_yaxis()
# Show raw data
male_female_normed.applymap(lambda x: '{:.2%}'.format(x))
bins=[7200 + 60*i for i in range (0, 4*60 + 1)]
plot = finish_time_histogram(mData)
plot[2][119].set_facecolor('r')
plot[2][149].set_facecolor('r')
plot[2][179].set_facecolor('r')
plot[2][209].set_facecolor('y')