com.ftpix.sherdogparser.parsers.EventParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of sherdog-parser Show documentation
Show all versions of sherdog-parser Show documentation
Library that parse sherdog website to provide java MMA database
package com.ftpix.sherdogparser.parsers;
import com.ftpix.sherdogparser.Constants;
import com.ftpix.sherdogparser.models.Event;
import com.ftpix.sherdogparser.models.Fight;
import com.ftpix.sherdogparser.models.FightResult;
import com.ftpix.sherdogparser.models.SherdogBaseObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.text.ParseException;
import java.time.ZoneId;
import java.time.format.DateTimeParseException;
import java.util.ArrayList;
import java.util.List;
/**
* Created by gz on 20-Aug-16.
* Class to parse an event by using its sherdog url
*/
public class EventParser implements SherdogParser {
private final int FIGHTER1_COLUMN = 1, FIGHTER2_COLUMN = 3, METHOD_COLUMN = 4, ROUND_COLUMN = 5, TIME_COLUMN = 6;
private final Logger logger = LoggerFactory.getLogger(EventParser.class);
private final ZoneId ZONE_ID;
/**
* Setting a zoneId will convert the dates to the desired zone id
*/
public EventParser(ZoneId zoneId) {
this.ZONE_ID = zoneId;
}
/**
* Creates an event parser with the default zone id
*/
public EventParser() {
this.ZONE_ID = ZoneId.systemDefault();
}
/**
* Parse a sherdog page
*
* @param url of the sherdog page
* @throws IOException if connecting to sherdog fails
* @throws ParseException if the page structure has changed
*/
@Override
public Event parse(String url) throws IOException, ParseException {
Event event = new Event();
event.setSherdogUrl(url);
Document doc = Jsoup.connect(url).timeout(Constants.PARSING_TIMEOUT).get();
//getting name
Elements name = doc.select(".header .section_title h1 span[itemprop=\"name\"]");
event.setName(name.html().replace("
", " - "));
Elements date = doc.select(".authors_info .date meta[itemprop=\"startDate\"]");
//TODO: get date to proper format
try {
event.setDate(ParserUtils.getDateFromStringToZoneId(date.first().attr("content"), ZONE_ID));
} catch (DateTimeParseException e) {
logger.error("Couldn't parse date", e);
}
getFights(doc, event);
Element org = doc.select(".header .section_title h2 a").get(0);
SherdogBaseObject organization = new SherdogBaseObject();
organization.setSherdogUrl(org.attr("abs:href"));
organization.setName(org.select("span[itemprop=\"name\"").get(0).html());
event.setOrganization(organization);
return event;
}
/**
* Gets the fight of the event
*
* @param doc the jsoup HTML document
* @param event The current event
*/
private void getFights(Document doc, Event event) {
logger.info("Getting fights for event #{}[{}]", event.getSherdogUrl(), event.getName());
SherdogBaseObject sEvent = new SherdogBaseObject();
sEvent.setName(event.getName());
sEvent.setSherdogUrl(event.getSherdogUrl());
List fights = new ArrayList<>();
//Checking on main event
Elements mainFightElement = doc.select(".content.event");
Elements fighters = mainFightElement.select("h3 a");
//First fighter
SherdogBaseObject mainFighter1 = new SherdogBaseObject();
Element mainFighter1Element = fighters.get(0);
mainFighter1.setSherdogUrl(mainFighter1Element.attr("abs:href"));
mainFighter1.setName(mainFighter1Element.select("span[itemprop=\"name\"]").html());
//second fighter
SherdogBaseObject mainFighter2 = new SherdogBaseObject();
Element mainFighter2Element = fighters.get(1);
mainFighter2.setSherdogUrl(mainFighter2Element.attr("abs:href"));
mainFighter2.setName(mainFighter2Element.select("span[itemprop=\"name\"]").html());
Fight mainFight = new Fight();
mainFight.setEvent(sEvent);
mainFight.setFighter1(mainFighter1);
mainFight.setFighter2(mainFighter2);
mainFight.setResult(ParserUtils.getFightResult(mainFightElement.first()));
//getting method
Elements mainTd = mainFightElement.select("td");
if (mainTd.size() > 0) {
mainFight.setWinMethod(mainTd.get(1).html().replaceAll("", "").trim());
mainFight.setWinRound(Integer.parseInt(mainTd.get(3).html().replaceAll("", "").trim()));
mainFight.setWinTime(mainTd.get(4).html().replaceAll("", "").trim());
}
mainFight.setDate(event.getDate());
fights.add(mainFight);
logger.info("Fight added: {}", mainFight);
//Checking on card results
logger.info("Found {} fights", fights.size());
Elements tds = doc.select(".event_match table tr");
fights.addAll(parseEventFights(tds, event));
event.setFights(fights);
}
/**
* Parse fights of an old event
*/
private List parseEventFights(Elements trs, Event event) {
SherdogBaseObject sEvent = new SherdogBaseObject();
sEvent.setName(event.getName());
sEvent.setSherdogUrl(event.getSherdogUrl());
List fights = new ArrayList<>();
trs.remove(0);
trs.forEach(tr -> {
Fight fight = new Fight();
fight.setEvent(sEvent);
fight.setDate(event.getDate());
Elements tds = tr.select("td");
fight.setFighter1(getFighter(tds.get(FIGHTER1_COLUMN)));
fight.setFighter2(getFighter(tds.get(FIGHTER2_COLUMN)));
//parsing old fight, we can get the result
if (tds.size() == 7) {
fight.setResult(getResult(tds.get(FIGHTER1_COLUMN)));
fight.setWinMethod(getMethod(tds.get(METHOD_COLUMN)));
fight.setWinRound(getRound(tds.get(ROUND_COLUMN)));
fight.setWinTime(getTime(tds.get(TIME_COLUMN)));
}
fights.add(fight);
logger.info("Fight added: {}", fight);
});
return fights;
}
/**
* Get a fighter
* @param td element from sherdog's table
* @return return a sherdogbaseobject with the fighter name and url
*/
private SherdogBaseObject getFighter(Element td) {
Elements name1 = td.select("span[itemprop=\"name\"]");
String name = name1.get(0).html();
String url = td.select("a[itemprop=\"url\"]").get(0).attr("abs:href");
SherdogBaseObject fighter = new SherdogBaseObject();
fighter.setSherdogUrl(url);
fighter.setName(name);
return fighter;
}
/**
* get the time at which teh fight finished
* @param td element from sherdog's table
* @return get the time of the event
*/
private String getTime(Element td) {
return td.html();
}
/**
* get the round at which the even finished
* @param td element from sherdog's table
* @return the round number
*/
private int getRound(Element td) {
return Integer.parseInt(td.html());
}
/**
*
* @param td element from sherdog's table
* @return get the win method
*/
private String getMethod(Element td) {
return td.html().replaceAll("
(.*)", "");
}
/**
* get the result of the fight
* @param td element from sherdog's table
* @return a rightresult enum
*/
private FightResult getResult(Element td) {
return ParserUtils.getFightResult(td);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy