diff --git a/src/main/java/org/parisjug/eventpublisher/eventpage/HtmlEventPage.java b/src/main/java/org/parisjug/eventpublisher/eventpage/HtmlEventPage.java
index 0b5587c..86b3852 100644
--- a/src/main/java/org/parisjug/eventpublisher/eventpage/HtmlEventPage.java
+++ b/src/main/java/org/parisjug/eventpublisher/eventpage/HtmlEventPage.java
@@ -1,5 +1,8 @@
package org.parisjug.eventpublisher.eventpage;
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static java.time.format.DateTimeFormatter.ISO_INSTANT;
+
import java.io.File;
import java.io.IOException;
import java.net.URLEncoder;
@@ -10,19 +13,22 @@
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class HtmlEventPage implements EventPage {
+ private static final String ABSOLUTE_URL = "https://www.parisjug.org";
+
Document doc;
@Override
public String getTitle() {
- Elements titleElements = doc.select(".post__title");
- if (titleElements.isEmpty()) {
+ Element titleElement = this.doc.selectFirst(".post__title");
+ if (titleElement == null) {
throw new EventPageCheckException(
"The page should contain an element with the id \"title\". For instance:
Quarkus World Tour
.");
}
- return titleElements.first().text();
+ return titleElement.text();
}
protected void loadFromLocalHtmlFile(File htmlFile) {
@@ -43,70 +49,58 @@ protected void loadFromUrl(String url) {
@Override
public String getDetails() {
- return getPart1() + getBuffet() + getPart2();
+ return convertToAbsoluteLinks(getPart1() + getBuffet() + getPart2());
}
public String getBuffet() {
- Elements buffet = doc.select("#buffet");
- if (buffet.isEmpty()) {
- Elements detailh3 = doc.select("#détails");
- if(detailh3.isEmpty()) {
- return "";
- }
- String buffethtml = "";
- Elements elements = detailh3.parents().first().children();
- // for each element, in elements.stream() start at h3 with id contains buffet and append html until next h3
- boolean start = false;
- for(int i = 0; i < elements.size(); i++) {
- if(elements.get(i).tagName().equals("h3") && elements.get(i).id().contains("buffet")) {
- start = true;
- }
- if(elements.get(i).tagName().equals("h3") && !elements.get(i).id().contains("buffet")) {
- start = false;
- continue;
- }
- if(start) {
- buffethtml += elements.get(i).outerHtml();
- }
- }
- return buffethtml.replaceAll("href=\"/", "href=\"https://www.parisjug.org/");
+ Element buffet = doc.selectFirst("#buffet");
+ if (buffet != null) {
+ return buffet.html();
}
- return buffet.first().html().replaceAll("href=\"/", "href=\"https://www.parisjug.org/");
+ Element buffetElement = doc.selectFirst("h3[id*='buffet']");
+ if (buffetElement == null) {
+ return "";
+ }
+ String buffetHtml = buffetElement.outerHtml();
+ while ((buffetElement = buffetElement.nextElementSibling()) != null &&
+ !"h3".equals(buffetElement.tagName())) {
+ buffetHtml+= buffetElement.outerHtml();
+ }
+ return buffetHtml;
}
@Override
public String getPart1() {
- Elements part1 = doc.select("#part1");
- if(part1.isEmpty()) {
- Elements detailh3 = doc.select("#détails");
- if(detailh3.isEmpty()) {
- return "";
+ Element part1 = doc.selectFirst("#part1");
+ if (part1 != null) {
+ return part1.html();
+ }
+ Elements detailh3 = doc.select("#détails");
+ if(detailh3.isEmpty()) {
+ return "";
+ }
+ String part1html = "";
+ Elements elements = detailh3.parents().first().children();
+ // for each element, in elements.stream() start at h3 with id détail and append html until next h3
+ boolean start = false;
+ for(int i = 0; i < elements.size(); i++) {
+ if(elements.get(i).tagName().equals("h2") && elements.get(i).id().equals("détails")) {
+ start = true;
+ continue;
}
- String part1html = "";
- Elements elements = detailh3.parents().first().children();
- // for each element, in elements.stream() start at h3 with id détail and append html until next h3
- boolean start = false;
- for(int i = 0; i < elements.size(); i++) {
- if(elements.get(i).tagName().equals("h2") && elements.get(i).id().equals("détails")) {
- start = true;
- continue;
- }
- if(elements.get(i).tagName().equals("h2") && !elements.get(i).id().equals("détails")) {
- start = false;
- continue;
- }
- if(elements.get(i).tagName().equals("h3") && elements.get(i).id().contains("buffet")) {
- start = false;
- continue;
- }
- if(start) {
- part1html += elements.get(i).outerHtml();
- }
+ if(elements.get(i).tagName().equals("h2") && !elements.get(i).id().equals("détails")) {
+ start = false;
+ continue;
+ }
+ if(elements.get(i).tagName().equals("h3") && elements.get(i).id().contains("buffet")) {
+ start = false;
+ continue;
+ }
+ if(start) {
+ part1html += elements.get(i).outerHtml();
}
- return part1html.replaceAll("href=\"/", "href=\"https://www.parisjug.org/");
-
}
- return part1.first().html().replaceAll("href=\"/", "href=\"https://www.parisjug.org/");
+ return part1html;
}
@Override
@@ -138,30 +132,25 @@ public String getPart2() {
part2html += elements.get(i).outerHtml();
}
}
- return part2html.replaceAll("href=\"/", "href=\"https://www.parisjug.org/");
+ return part2html;
}
- return part2.first().html().replaceAll("href=\"/", "href=\"https://www.parisjug.org/");
+ return part2.first().html();
}
@Override
public String getDateTime() {
- Elements dateTimeElement = doc.select("#datetime");
- if (dateTimeElement.isEmpty()) {
- // in the section starting with h2 id="date-et-lieu", get the first ul li element
- Elements elements = doc.select("#date-et-lieu").parents().first().children();
- for(int i = 0; i < elements.size(); i++) {
- if(elements.get(i).tagName().equals("ul")) {
- Elements lis = elements.get(i).children();
- for(int j = 0; j < lis.size(); j++) {
- if(lis.get(j).tagName().equals("li")) {
- return lis.get(j).text();
- }
- }
- }
- }
-
+ Element dateTimeElement = doc.selectFirst("#datetime");
+ if (dateTimeElement != null) {
+ return dateTimeElement.text();
+ }
+ // in the section starting with h2 id="date-et-lieu", get the first ul li element
+ Element dateEtLieuElement = doc.selectFirst("#date-et-lieu + ul > li");
+ if (dateEtLieuElement == null) {
+ throw new EventPageCheckException(
+ "The page should contain an element with the id \"date-et-lieu\"."
+ );
}
- return dateTimeElement.first().text();
+ return dateEtLieuElement.text();
}
@Override
@@ -170,7 +159,7 @@ public String getStartTime() {
if (isVirtual()) {
eventDateTime = eventDateTime.minusMinutes(15);
}
- return eventDateTime.format(DateTimeFormatter.ISO_INSTANT).replace(":", "").replace("-", "");
+ return eventDateTime.format(ISO_INSTANT).replace(":", "").replace("-", "");
}
private ZonedDateTime getEventZonedDateTime() {
@@ -185,7 +174,7 @@ public String getEndTime() {
} else {
eventDateTime = eventDateTime.plusMinutes(180);
}
- return eventDateTime.format(DateTimeFormatter.ISO_INSTANT).replace(":", "").replace("-", "");
+ return eventDateTime.format(ISO_INSTANT).replace(":", "").replace("-", "");
}
ZonedDateTime parseDateTime(String datetimeInput) {
@@ -204,23 +193,19 @@ public String getLongTitle() {
@Override
public String getLocation() {
- Elements locationElement = doc.select("#location a");
- if (locationElement.isEmpty()) {
+ Element locationElement = doc.selectFirst("#location a");
+ if (locationElement == null) {
// in the section starting with h2 id="date-et-lieu", get the second li element
- Elements elements = doc.select("#date-et-lieu").parents().first().getElementsByTag("li");
- if(elements.size() > 1) {
- locationElement = elements.get(1).getElementsByTag("a");
- }
- else {
- return "";
- }
+ locationElement = doc.selectFirst("#date-et-lieu + ul > li:eq(1) > a");
}
-
- String attr = locationElement.first().attr("href");
- if (attr.startsWith("/")) {
- attr = "https://www.parisjug.org" + attr;
+ if (locationElement == null) {
+ return "";
+ }
+ String location = locationElement.attr("href");
+ if (location.startsWith("/")) {
+ location = ABSOLUTE_URL + location;
}
- return attr;
+ return location;
}
@Override
@@ -234,16 +219,16 @@ public String generateGcalLink() {
}
String encode(String str) {
- return URLEncoder.encode(str, StandardCharsets.UTF_8);
+ return URLEncoder.encode(str, UTF_8);
}
@Override
public String getIntro() {
- Elements intro = doc.select("#intro");
- if (intro.isEmpty()) {
+ Element intro = this.doc.selectFirst("#intro");
+ if (intro == null) {
return "";
}
- return intro.first().html().replaceAll("href=\"/", "href=\"https://www.parisjug.org/");
+ return convertToAbsoluteLinks(intro.html());
}
@Override
@@ -251,4 +236,7 @@ public boolean isVirtual() {
return getTitle().contains("Soirée Virtuelle");
}
+ private static String convertToAbsoluteLinks(String html) {
+ return html.replaceAll("href=\"/", "href=\"" + ABSOLUTE_URL + "/");
+ }
}
diff --git a/src/test/java/org/parisjug/eventpublisher/eventpage/EventPage20231212Test.java b/src/test/java/org/parisjug/eventpublisher/eventpage/EventPage20231212Test.java
index 885370e..293663b 100644
--- a/src/test/java/org/parisjug/eventpublisher/eventpage/EventPage20231212Test.java
+++ b/src/test/java/org/parisjug/eventpublisher/eventpage/EventPage20231212Test.java
@@ -24,7 +24,7 @@ public void test_inRealLifeEventPage() {
// title
assertEquals("Soirée Panama (GL)", page.getTitle(), "Title from page");
- // assert page.getPart1 should contains "19h30 : Panama - Foreign Function &
+ // assert page.getPart1 should contain "19h30 : Panama - Foreign Function &
// Memory"
assertTrue(page.getPart1().contains("19h30 : Panama - Foreign Function"),
"part1 should contains 19h30 : Panama - Foreign Function");