import requests
from bs4 import BeautifulSoup
#Starting at the full results page, get the list of tags for yearly results and links to the results pages
r_cyrs = requests.get("https://ibjjf.com/events/results")
c_cyrs = r_cyrs.content
soup_yrs = BeautifulSoup(c_cyrs, "html.parser")
year_results = soup_yrs.find_all("a", {"data-n":"World Jiu-Jitsu IBJJF Championship"})
print(year_results)
[<a class="event-year-result" data-n="World Jiu-Jitsu IBJJF Championship" data-y="1996" href="https://ibjjf.com/events/results/1996-world-jiu-jitsu-ibjjf-championship" target="_blank">1996</a>, <a class="event-year-result" data-n="World Jiu-Jitsu IBJJF Championship" data-y="1997" href="https://ibjjf.com/events/results/1997-world-jiu-jitsu-ibjjf-championship" target="_blank">1997</a>, <a class="event-year-result" data-n="World Jiu-Jitsu IBJJF Championship" data-y="1998" href="https://ibjjf.com/events/results/1998-world-jiu-jitsu-ibjjf-championship" target="_blank">1998</a>, <a class="event-year-result" data-n="World Jiu-Jitsu IBJJF Championship" data-y="1999" href="https://ibjjf.com/events/results/1999-world-jiu-jitsu-ibjjf-championship" target="_blank">1999</a>, <a class="event-year-result" data-n="World Jiu-Jitsu IBJJF Championship" data-y="2000" href="https://ibjjf.com/events/results/2000-world-jiu-jitsu-ibjjf-championship" target="_blank">2000</a>, <a class="event-year-result" data-n="World Jiu-Jitsu IBJJF Championship" data-y="2001" href="https://ibjjf.com/events/results/2001-world-jiu-jitsu-ibjjf-championship" target="_blank">2001</a>, <a class="event-year-result" data-n="World Jiu-Jitsu IBJJF Championship" data-y="2002" href="https://ibjjf.com/events/results/2002-world-jiu-jitsu-ibjjf-championship" target="_blank">2002</a>, <a class="event-year-result" data-n="World Jiu-Jitsu IBJJF Championship" data-y="2003" href="https://ibjjf.com/events/results/2003-world-jiu-jitsu-ibjjf-championship" target="_blank">2003</a>, <a class="event-year-result" data-n="World Jiu-Jitsu IBJJF Championship" data-y="2004" href="https://ibjjf.com/events/results/2004-world-jiu-jitsu-ibjjf-championship" target="_blank">2004</a>, <a class="event-year-result" data-n="World Jiu-Jitsu IBJJF Championship" data-y="2005" href="https://ibjjf.com/events/results/2005-world-jiu-jitsu-ibjjf-championship" target="_blank">2005</a>, <a class="event-year-result" data-n="World Jiu-Jitsu IBJJF Championship" data-y="2006" href="https://ibjjf.com/events/results/2006-world-jiu-jitsu-ibjjf-championship" target="_blank">2006</a>, <a class="event-year-result" data-n="World Jiu-Jitsu IBJJF Championship" data-y="2007" href="https://ibjjf.com/events/results/2007-world-jiu-jitsu-ibjjf-championship" target="_blank">2007</a>, <a class="event-year-result" data-n="World Jiu-Jitsu IBJJF Championship" data-y="2008" href="https://ibjjf.com/events/results/2008-world-jiu-jitsu-ibjjf-championship" target="_blank">2008</a>, <a class="event-year-result" data-n="World Jiu-Jitsu IBJJF Championship" data-y="2009" href="https://ibjjf.com/events/results/2009-world-jiu-jitsu-ibjjf-championship" target="_blank">2009</a>, <a class="event-year-result" data-n="World Jiu-Jitsu IBJJF Championship" data-y="2010" href="https://ibjjf.com/events/results/2010-world-jiu-jitsu-ibjjf-championship" target="_blank">2010</a>, <a class="event-year-result" data-n="World Jiu-Jitsu IBJJF Championship" data-y="2011" href="https://ibjjf.com/events/results/2011-world-jiu-jitsu-ibjjf-championship" target="_blank">2011</a>, <a class="event-year-result" data-n="World Jiu-Jitsu IBJJF Championship" data-y="2012" href="https://www.ibjjfdb.com/ChampionshipResults/114/PublicResults" target="_blank">2012</a>, <a class="event-year-result" data-n="World Jiu-Jitsu IBJJF Championship" data-y="2013" href="https://www.ibjjfdb.com/ChampionshipResults/177/PublicResults" target="_blank">2013</a>, <a class="event-year-result" data-n="World Jiu-Jitsu IBJJF Championship" data-y="2014" href="https://www.ibjjfdb.com/ChampionshipResults/272/PublicResults" target="_blank">2014</a>, <a class="event-year-result" data-n="World Jiu-Jitsu IBJJF Championship" data-y="2015" href="https://www.ibjjfdb.com/ChampionshipResults/415/PublicResults" target="_blank">2015</a>, <a class="event-year-result" data-n="World Jiu-Jitsu IBJJF Championship" data-y="2016" href="https://www.ibjjfdb.com/ChampionshipResults/535/PublicResults" target="_blank">2016</a>, <a class="event-year-result" data-n="World Jiu-Jitsu IBJJF Championship" data-y="2017" href="https://www.ibjjfdb.com/ChampionshipResults/730/PublicResults" target="_blank">2017</a>, <a class="event-year-result" data-n="World Jiu-Jitsu IBJJF Championship" data-y="2018" href="https://www.ibjjfdb.com/ChampionshipResults/926/PublicResults" target="_blank">2018</a>, <a class="event-year-result" data-n="World Jiu-Jitsu IBJJF Championship" data-y="2019" href="https://www.ibjjfdb.com/ChampionshipResults/1209/PublicResults" target="_blank">2019</a>, <a class="event-year-result" data-n="World Jiu-Jitsu IBJJF Championship" data-y="2021" href="https://www.ibjjfdb.com/ChampionshipResults/1776/PublicResults" target="_blank">2021</a>, <a class="event-year-result" data-n="World Jiu-Jitsu IBJJF Championship" data-y="2022" href="https://www.ibjjfdb.com/ChampionshipResults/1877/PublicResults" target="_blank">2022</a>]
#Create a dictionary of year and link to results
ly = []
for item in year_results:
dy = {}
dy['Year'] = item['data-y']
dy['results_link'] = item['href']
ly.append(dy)
#Create an empty list to add the results from the loop
l = []
for i in ly:
#create soup of results for each championship year
r = requests.get(i.get("results_link"))
c = r.content
lp_soup = BeautifulSoup(c, "html.parser")
#1996 to 2011 results
early_yr_rslts = lp_soup.find_all("td",{"class":"name"})
#2012 to 2022 results (different html file setup than earlier years)
later_yr_rslts = lp_soup.find_all("div", {"class":"list-item"})
for item in early_yr_rslts:
#create an empty dictionary that will be added to the list, which ultimately gets added to df
d = {}
#add the year, category, rank, team and points data for each team to a
d['Year'] = i.get("Year")
try:
d['Category'] = item.find_previous("div",{"class":"category mt-4 mb-3"}).text.strip()
except:
d['Category'] = None
try:
d['Rank'] = item.find_previous_sibling("td",{"class":"place"}).text.strip()
except:
d['Rank'] = None
try:
d['Team'] = item.text.strip()
except:
d['Team'] = None
try:
d['Points'] = item.find_next("td",{"class":"points"}).text.strip()
except:
d['Points'] = None
l.append(d)
for item in later_yr_rslts:
#Create an empty dictionary that will be added to the list, which ultimately gets added to df
d = {}
#list the year, category, rank, team and points data for each team
d['Year'] = i.get("Year")
d['Category'] = item.parent.find_previous_sibling("h4",{"class":"subtitle"}).text
d['Rank'] = item.find("div",{"class":"position"}).text.strip()
d['Team'] = item.find("div",{"class":"name"}).text.strip()
d['Points'] = item.find("div",{"class":"points"}).text.strip()
l.append(d)
l;
import pandas as pd
df = pd.DataFrame(l)
df;
The raw data has duplicate category names for some years (in English and Portuguese), some have too much detail (includes the year and tournament name)
#Remove 2011 results in English, which don't have the points. Portuguese results include points, so we'll convert those later.
df.drop(df.index[222:231], inplace=True)
df
| Year | Category | Rank | Team | Points | |
|---|---|---|---|---|---|
| 0 | 1997 | Overall | 1 | Nova Uniao | |
| 1 | 1997 | Overall | 2 | Gracie Barra | |
| 2 | 1997 | Overall | 3 | Protesto | |
| 3 | 1998 | Adult Male | 1 | Alliance | |
| 4 | 1998 | Adult Male | 2 | Nova Uniao | |
| ... | ... | ... | ... | ... | ... |
| 526 | 2022 | Juvenile | 6 | CheckMat | 26 |
| 527 | 2022 | Juvenile | 7 | Alliance | 23 |
| 528 | 2022 | Juvenile | 8 | Nova União | 12 |
| 529 | 2022 | Juvenile | 9 | John Frankl Jiu-Jitsu | 12 |
| 530 | 2022 | Juvenile | 10 | Fight for a Kid | 12 |
522 rows × 5 columns
df.loc[(df['Year'] == '2011') & (df['Category']=='Juvenil'), 'Category'] = 'Juvenile'
df.loc[(df['Year'] == '2011') & (df['Category']=='Feminino'), 'Category'] = 'Adult Female'
df.loc[(df['Year'] == '2011') & (df['Category']=='Masculino'), 'Category'] = 'Adult Male'
df[213:222]; #check results of 2011 are as expected
df['Category'].unique()
array(['Overall', 'Adult Male', 'Adult Female', 'Juvenile', 'Juvenil',
'Feminino', 'Masculino', 'Novice', 'Adult', 'Female',
'Adult Male - World Jiu-Jitsu Championship 2014',
'Adult Female - World Jiu-Jitsu Championship 2014',
'Juvenile - World Jiu-Jitsu Championship 2014',
'Adult Male - Worlds 2015', 'Adult Female - Worlds 2015',
'Juvenile - Worlds 2015', 'Adult Male (Adulto Masculino)',
'Adult Female (Adulto Feminino)', 'Juvenile (Juvenil)',
'Adult Male - World JJ Championship 2017',
'Adult Female - World JJ Championship 2017',
'Juvenile - World JJ Championship 2017'], dtype=object)
#Drop the Portuguese labels, which are duplicates of the English ones
indexPortuguese = df[(df['Category']=='Juvenil') | (df['Category']=='Feminino') | (df['Category']=='Masculino')].index
df.drop(indexPortuguese, inplace=True)
df
| Year | Category | Rank | Team | Points | |
|---|---|---|---|---|---|
| 0 | 1997 | Overall | 1 | Nova Uniao | |
| 1 | 1997 | Overall | 2 | Gracie Barra | |
| 2 | 1997 | Overall | 3 | Protesto | |
| 3 | 1998 | Adult Male | 1 | Alliance | |
| 4 | 1998 | Adult Male | 2 | Nova Uniao | |
| ... | ... | ... | ... | ... | ... |
| 526 | 2022 | Juvenile | 6 | CheckMat | 26 |
| 527 | 2022 | Juvenile | 7 | Alliance | 23 |
| 528 | 2022 | Juvenile | 8 | Nova União | 12 |
| 529 | 2022 | Juvenile | 9 | John Frankl Jiu-Jitsu | 12 |
| 530 | 2022 | Juvenile | 10 | Fight for a Kid | 12 |
441 rows × 5 columns
#Simplify the category names. *1996 had no official team ranking, 1997 was "Overall" but only had male competitors
#2014 through 2017 category naming was very aggressive
dict = {
'Overall':'Adult Male',
'Adult Male':'Adult Male',
'Adult Female':'Adult Female',
'Juvenile':'Juvenile',
'Adult':'Adult Male',
'Female':'Adult Female',
'Adult Male - World Jiu-Jitsu Championship 2014':'Adult Male',
'Adult Female - World Jiu-Jitsu Championship 2014':'Adult Female',
'Juvenile - World Jiu-Jitsu Championship 2014':'Juvenile',
'Adult Male - Worlds 2015':'Adult Male',
'Adult Female - Worlds 2015':'Adult Female',
'Juvenile - Worlds 2015':'Juvenile',
'Adult Male (Adulto Masculino)':'Adult Male',
'Adult Female (Adulto Feminino)':'Adult Female',
'Juvenile (Juvenil)':'Juvenile',
'Adult Male - World JJ Championship 2017':'Adult Male',
'Adult Female - World JJ Championship 2017':'Adult Female',
'Juvenile - World JJ Championship 2017':'Juvenile'
}
df.replace({'Category': dict}, inplace=True)
df['Category'].unique()
array(['Adult Male', 'Adult Female', 'Juvenile', 'Novice'], dtype=object)
#Convert rank from string to int so the plot shows ranks properly. If we leave it as a string, the order is: 1, 10, 2, 3, etc.
df['Rank'] = df['Rank'].astype(int)
#Convert Year to int so we can use min/max methods later
df['Year'] = df['Year'].astype(int)
df
| Year | Category | Rank | Team | Points | |
|---|---|---|---|---|---|
| 0 | 1997 | Adult Male | 1 | Nova Uniao | |
| 1 | 1997 | Adult Male | 2 | Gracie Barra | |
| 2 | 1997 | Adult Male | 3 | Protesto | |
| 3 | 1998 | Adult Male | 1 | Alliance | |
| 4 | 1998 | Adult Male | 2 | Nova Uniao | |
| ... | ... | ... | ... | ... | ... |
| 526 | 2022 | Juvenile | 6 | CheckMat | 26 |
| 527 | 2022 | Juvenile | 7 | Alliance | 23 |
| 528 | 2022 | Juvenile | 8 | Nova União | 12 |
| 529 | 2022 | Juvenile | 9 | John Frankl Jiu-Jitsu | 12 |
| 530 | 2022 | Juvenile | 10 | Fight for a Kid | 12 |
441 rows × 5 columns
team_dict = {
"Atos JJ": "Atos Jiu-Jitsu",
"Carlson Gracie Team": "Carlson Gracie",
"Carlson Gracie UGF": "Carlson Gracie",
"Checkmat BJJ": "CheckMat",
"Gracie Barra/Pitbull": "Gracie Barra Pitbull",
"Gracie Humaitá": "Gracie Humaita",
"Infight": "Infight JJ",
"Infight Jiu-Jitsu": "Infight JJ",
"Nova União": "Nova Uniao",
"PSLPB Cicero Costha": "Cicero Costha Internacional",
"Renzo Gracie Academy": "Renzo Gracie",
"Ribeiro JJ": "Ribeiro Jiu-Jitsu",
"Soul Fighters": "Soul Fighters BJJ",
"Zenith BJJ - Las Vegas": "Zenith BJJ",
"Zenith BJJ - Poland": "Zenith BJJ"
}
df.replace({'Team': team_dict}, inplace=True)
df['Team'].unique()
array(['Nova Uniao', 'Gracie Barra', 'Protesto', 'Alliance',
'Carlson Gracie', 'UGF', 'Brigadeiro', 'Gracie Humaita',
'Osvaldo Alves', 'Brazilian Top Team', 'Equipe III', 'N. Cadan',
'Equilíbrio', 'Miesimo', 'Isaias JJ', 'Master', 'Behring',
'Gracie Barra Pitbull', 'TT Jiu-Jitsu', 'Leão Dourado', 'Brasa',
'Team Frontline', 'Pedro Gama Filho', 'Chute Boxe', 'Monteiro',
'Bon Sai', 'Infight JJ', 'Renzo Gracie', 'Rodrigo Vaghi',
'Ryan Gracie', 'Ralph Gracie', 'Godoi JJ Club',
'Soul Fighters BJJ', 'CheckMat', 'Gigante BJJ', 'Lloyd Irvin',
'Ribeiro Jiu-Jitsu', 'Atos Jiu-Jitsu', 'Team Lloyd Irvin',
'Gracie Elite Team', 'Cicero Costha Internacional', 'GF Team',
'CT MR', 'Big Brothers', 'Raul Castillo BJJ',
'Cia Paulista - International', 'American Top Team',
'Brazil 021 School of Jiu-Jitsu', 'Zenith BJJ',
'Integração / EOFC', 'Equipe Black Belt Team',
'Alliance International', 'Bruno Bastos Association',
'Purebred Jiu Jitsu Guam', 'Start Brazilian Jiu-Jitsu',
'Campeões Esperança Jiu-Jitsu Team', 'Brasa CTA',
'Marcio Cruz BJJ', 'Associação Oriente', 'Lótus Club',
'Tokai Brazilian Jiu-Jitsu', 'Ns Brotherhood',
'Academia Day Bu Kan - DBK', 'Guigo BJJ',
'Maromba Academia International', 'American Jiu-Jitsu Syndicate',
'David Jacobs Jiu-Jitsu Team', 'Unity Jiu-jitsu',
'Jiu-Jitsu For Life Team', 'Fight Sports', 'Ares BJJ', 'LEAD BJJ',
'Gracie Humaita Reunion', 'R1NG BJJ', 'Rodrigo Pinheiro BJJ',
'Brazilian Fight Factory', 'Art of Jiu Jitsu', 'Dream Art',
'Cardonas BJJ', 'Six Blades Jiu-Jitsu',
'Qatar BJJ / Vision Brasil', 'John Frankl Jiu-Jitsu',
'Fight for a Kid'], dtype=object)
df.to_csv("IBJJF_Worlds_Results.csv")
#Create new dataframes which we'll use for each of the different categories (men, women, juvenile)
df1 = df.query("Category == 'Adult Male'")
df2 = df.query("Category == 'Adult Female'")
df3 = df.query("Category == 'Juvenile'")
import altair as alt
selection = alt.selection_multi(fields=['Team'], bind='legend')
chart1 = alt.Chart(df1).mark_line(point=True).encode(
x=alt.X("Year:N", axis=alt.Axis(labelAngle=-45)),
y=alt.Y("Rank", impute=alt.ImputeParams(value=None, keyvals=alt.ImputeSequence(start=df1['Year'].min(), stop=df1['Year'].max())), sort="descending", scale=alt.Scale(domain=[0.5, 10.5])),
color=alt.Color("Team:N", scale=alt.Scale(scheme='tableau20'), legend=alt.Legend(symbolLimit=0, columns=2)),
tooltip=["Year","Team","Rank","Points"],
opacity=alt.condition(selection, alt.value(1), alt.value(0.1))
).properties(
title="IBJJF Worlds - Men's Team Results 1997 to 2022",
width=700,
height=400
).configure_title(fontSize=18
).add_selection(
selection
).interactive(bind_y=False)
chart1
#Almost identical to chart1, but we need to modify symbolLimit to see all teams (default cuts the list short). Setting symbolLimit to 0 allows unlimited legend entries.
chart2 = alt.Chart(df2).mark_line(point=True).encode(
x=alt.X("Year:O", axis=alt.Axis(labelAngle=-45)),
y=alt.Y("Rank", impute=alt.ImputeParams(value=None, keyvals=alt.ImputeSequence(start=df2['Year'].min(), stop=df2['Year'].max())), sort="descending", scale=alt.Scale(domain=[0.5, 10.5])),
color=alt.Color("Team:N", scale=alt.Scale(scheme='tableau20'), legend=alt.Legend(symbolLimit=0, columns=2)),
tooltip=["Year","Team","Rank","Points"],
opacity=alt.condition(selection, alt.value(1), alt.value(0.1))
).properties(
title="IBJJF Worlds - Women's Team Results 1998 to 2022",
width=700,
height=400
).configure_title(fontSize=18
).add_selection(
selection
).interactive(bind_y=False)
chart2
#Almost identical to chart1, but we need to modify symbolLimit to see all teams (default cuts the list short). Setting symbolLimit to 0 allows unlimited legend entries.
chart3 = alt.Chart(df3).mark_line(point=True).encode(
x=alt.X("Year:O", axis=alt.Axis(labelAngle=-45)),
y=alt.Y("Rank", impute=alt.ImputeParams(value=None, keyvals=alt.ImputeSequence(start=df3['Year'].min(), stop=df3['Year'].max())), sort="descending", scale=alt.Scale(domain=[0.5, 10.5])),
color=alt.Color("Team:N", scale=alt.Scale(scheme='tableau20'), legend=alt.Legend(symbolLimit=0, columns=2)),
tooltip=["Year","Team","Rank","Points"],
opacity=alt.condition(selection, alt.value(1), alt.value(0.1))
).transform_window(
rank="rank()",
groupby=["Year"],
).properties(
title="IBJJF Worlds - Juvenile Team Results 1998 to 2022",
width=700,
height=400
).configure_title(fontSize=18
).add_selection(
selection
).interactive(bind_y=False)
chart3