In [ ]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
urlHead = 'https://www.run2pix.com/report/report_w.php?EventCode='
eventCode =[[['20171217', 'MA', '161', 12], ['20171217', 'HM', '162', 32]],
            [['20161218', 'MA', '136', 12], ['20161218', 'HM', '137', 28]],
            [['20151220', 'MA', '111', 10], ['20151220', 'HM', '112', 20]],
            [['20141221', 'MA', '86', 11], ['20141221', 'HM', '87', 34]],
            [['20131215', 'MA', '57', 10],['20131215','HM', '58', 31]]]
yearCheck = {2017: eventCode[0], 2016: eventCode[1], 2015: eventCode[2], 2014: eventCode[3], 2013: eventCode[4]}
pageRunnerNum = 500

def checkTime(x):
    h,m,s = map(int, x.split(":"))
    hr,mi,se =map(str, x.split(":"))
    if m == 60:
        h=h+1
        hr='0'+str(h)
        mi='00'
        return hr+":"+mi+":"+se
    else: return x

def secondSelect(gender):
    if runner[3][1]==u'\u570b':
        runnerInfo = [y, yearCheck[y][case][1], runner[5],runner[4],runner[7], gender, 'Invited']
    elif runner[3][1:3]=='19':
        runnerInfo =[y, yearCheck[y][case][1], runner[5],runner[4],runner[7], gender, 'Under 20']
    elif runner[3][1]=='6':
        runnerInfo =[y, yearCheck[y][case][1], runner[5],runner[4],runner[7], gender, '60+']
    elif runner[3][1]=='7':
        runnerInfo =[y, yearCheck[y][case][1], runner[5],runner[4],runner[7], gender, '60+']
    elif runner[3][1:3]==u'\u5b50\u7d44':
        runnerInfo =[y, yearCheck[y][case][1], runner[5],runner[4],runner[7], gender, 'HM']
    elif runner[3][1]==u'\u8996':
        runnerInfo =[y, yearCheck[y][case][1], runner[5],runner[4],runner[7], gender, 'Visually Impaired']
    else: runnerInfo =[y, yearCheck[y][case][1], runner[5],runner[4],runner[7], gender, runner[3][1:6]]
    return runnerInfo
                                                                                                     
def groupSelect(s):
        if s == u'\u5973':
            secondSelect('F')
        else: secondSelect('M')
        return runnerInfo
	
multiRunner_info=[]
finishTime=[]
for y in range(2013, 2018):
    for case in range(0,2):
        pageCount = yearCheck[y][case][3]
        for k in range(0, pageCount):
            url = urlHead+ yearCheck[y][case][0]+"&Race="+yearCheck[y][case][1] + "&sn=" +yearCheck[y][case][2] + "&pagenum=" + str(k+1)
            page = requests.get(url)
            soup = BeautifulSoup(page.content, 'html.parser')

            for i in range(0, pageRunnerNum):
                runnerTable = soup.findAll('table')[1].findAll('tr')[i+11]
                if (len(runnerTable)<4):
                    break
                runner = [td.getText(strip=True) for td in runnerTable.findAll('td')]
                runner[4]=checkTime(runner[4])
                runner[7]=checkTime(runner[7])
                ## ---- Runner[3][0] represents MALE or FEMALE maleGroup or femaleGroup ----
                runnerInfo = groupSelect(runner[3][0])
                multiRunner_info.insert((k*500+i), runnerInfo)
			

df = pd.DataFrame(multiRunner_info, columns=['Year', 'Full_half', 'Rank', 'Official_Time' ,'Net_Time', 'Gender','Catagory'])
df.to_csv('allTPE_Runner2013_2017.csv', encoding='utf-8-sig')