From e28231d799e587c1e89e6716964ce06404e361a9 Mon Sep 17 00:00:00 2001 From: pidoubleyou <22942659+pidoubleyou@users.noreply.github.com> Date: Tue, 4 Nov 2025 22:07:04 +0100 Subject: [PATCH 1/5] new arte crwaler --- .../java/de/mediathekview/mlib/Const.java | 4 + .../java/mServer/crawler/FilmeSuchen.java | 13 +- .../ArteCategoryFilmListDeserializer.java | 46 --- .../sender/arte/ArteCategoryFilmsDTO.java | 37 -- .../arte/ArteCollectionChildDeserializer.java | 42 --- .../ArteCollectionParentDeserializer.java | 41 --- .../crawler/sender/arte/ArteConstants.java | 10 + .../crawler/sender/arte/ArteCrawler.java | 113 ++++++ .../crawler/sender/arte/ArteCrawler_EN.java | 17 + .../crawler/sender/arte/ArteCrawler_ES.java | 17 + .../crawler/sender/arte/ArteCrawler_FR.java | 17 + .../crawler/sender/arte/ArteCrawler_IT.java | 17 + .../crawler/sender/arte/ArteCrawler_PL.java | 16 + .../arte/ArteDatenFilmDeserializer.java | 51 --- .../crawler/sender/arte/ArteHttpClient.java | 116 ------ .../ArteJsonObjectToDatenFilmCallable.java | 56 --- .../crawler/sender/arte/ArteLanguage.java | 21 ++ .../sender/arte/ArteListBaseDeserializer.java | 64 ---- .../ArteProgramIdToDatenFilmCallable.java | 142 -------- ...pper.java => ArteRestVideoTypeMapper.java} | 41 ++- .../sender/arte/ArteSubPageDeserializer.java | 28 -- .../crawler/sender/arte/ArteVideoDTO.java | 70 ---- .../sender/arte/ArteVideoDeserializer.java | 89 ----- .../sender/arte/ArteVideoDetailsDTO.java | 60 ---- .../arte/ArteVideoDetailsDeserializer.java | 331 ------------------ .../crawler/sender/arte/MediathekArte.java | 330 ----------------- .../sender/arte/json/ArteSubtitleLinkDto.java | 55 +++ .../arte/json/ArteVideoInfoDeserializer.java | 153 ++++++++ .../sender/arte/json/ArteVideoInfoDto.java | 198 +++++++++++ .../arte/json/ArteVideoLinkDeserializer.java | 62 ++++ .../sender/arte/json/ArteVideoLinkDto.java | 60 ++++ .../arte/tasks/ArteDtoVideo2FilmTask.java | 185 ++++++++++ .../sender/arte/tasks/ArteVideoInfoTask.java | 97 +++++ .../sender/arte/tasks/ArteVideoLinkTask.java | 65 ++++ .../crawler/sender/base/JsoupConnection.java | 68 ++++ .../ArteCategoryFilmListDeserializerTest.java | 55 --- .../arte/ArteSubPageDeserializerTest.java | 55 --- .../ArteVideoDetailsDeserializerTest.java | 74 ---- 38 files changed, 1212 insertions(+), 1704 deletions(-) delete mode 100644 src/main/java/mServer/crawler/sender/arte/ArteCategoryFilmListDeserializer.java delete mode 100644 src/main/java/mServer/crawler/sender/arte/ArteCategoryFilmsDTO.java delete mode 100644 src/main/java/mServer/crawler/sender/arte/ArteCollectionChildDeserializer.java delete mode 100644 src/main/java/mServer/crawler/sender/arte/ArteCollectionParentDeserializer.java create mode 100644 src/main/java/mServer/crawler/sender/arte/ArteConstants.java create mode 100644 src/main/java/mServer/crawler/sender/arte/ArteCrawler.java create mode 100644 src/main/java/mServer/crawler/sender/arte/ArteCrawler_EN.java create mode 100644 src/main/java/mServer/crawler/sender/arte/ArteCrawler_ES.java create mode 100644 src/main/java/mServer/crawler/sender/arte/ArteCrawler_FR.java create mode 100644 src/main/java/mServer/crawler/sender/arte/ArteCrawler_IT.java create mode 100644 src/main/java/mServer/crawler/sender/arte/ArteCrawler_PL.java delete mode 100644 src/main/java/mServer/crawler/sender/arte/ArteDatenFilmDeserializer.java delete mode 100644 src/main/java/mServer/crawler/sender/arte/ArteHttpClient.java delete mode 100644 src/main/java/mServer/crawler/sender/arte/ArteJsonObjectToDatenFilmCallable.java create mode 100644 src/main/java/mServer/crawler/sender/arte/ArteLanguage.java delete mode 100644 src/main/java/mServer/crawler/sender/arte/ArteListBaseDeserializer.java delete mode 100644 src/main/java/mServer/crawler/sender/arte/ArteProgramIdToDatenFilmCallable.java rename src/main/java/mServer/crawler/sender/arte/{ArteVideoTypeMapper.java => ArteRestVideoTypeMapper.java} (79%) delete mode 100644 src/main/java/mServer/crawler/sender/arte/ArteSubPageDeserializer.java delete mode 100644 src/main/java/mServer/crawler/sender/arte/ArteVideoDTO.java delete mode 100644 src/main/java/mServer/crawler/sender/arte/ArteVideoDeserializer.java delete mode 100644 src/main/java/mServer/crawler/sender/arte/ArteVideoDetailsDTO.java delete mode 100644 src/main/java/mServer/crawler/sender/arte/ArteVideoDetailsDeserializer.java delete mode 100644 src/main/java/mServer/crawler/sender/arte/MediathekArte.java create mode 100644 src/main/java/mServer/crawler/sender/arte/json/ArteSubtitleLinkDto.java create mode 100644 src/main/java/mServer/crawler/sender/arte/json/ArteVideoInfoDeserializer.java create mode 100644 src/main/java/mServer/crawler/sender/arte/json/ArteVideoInfoDto.java create mode 100644 src/main/java/mServer/crawler/sender/arte/json/ArteVideoLinkDeserializer.java create mode 100644 src/main/java/mServer/crawler/sender/arte/json/ArteVideoLinkDto.java create mode 100644 src/main/java/mServer/crawler/sender/arte/tasks/ArteDtoVideo2FilmTask.java create mode 100644 src/main/java/mServer/crawler/sender/arte/tasks/ArteVideoInfoTask.java create mode 100644 src/main/java/mServer/crawler/sender/arte/tasks/ArteVideoLinkTask.java delete mode 100644 src/test/developTest/java/mServer/crawler/sender/arte/ArteCategoryFilmListDeserializerTest.java delete mode 100644 src/test/developTest/java/mServer/crawler/sender/arte/ArteSubPageDeserializerTest.java delete mode 100644 src/test/developTest/java/mServer/crawler/sender/arte/ArteVideoDetailsDeserializerTest.java diff --git a/src/main/java/de/mediathekview/mlib/Const.java b/src/main/java/de/mediathekview/mlib/Const.java index cb3a3b7cd..d32cb16f0 100644 --- a/src/main/java/de/mediathekview/mlib/Const.java +++ b/src/main/java/de/mediathekview/mlib/Const.java @@ -44,7 +44,11 @@ public class Const { public static final String ARD = "ARD"; public static final String ARD_ALPHA = "ARD-alpha"; public static final String ARTE_DE = "ARTE.DE"; + public static final String ARTE_EN = "ARTE.EN"; + public static final String ARTE_ES = "ARTE.ES"; public static final String ARTE_FR = "ARTE.FR"; + public static final String ARTE_IT = "ARTE.IT"; + public static final String ARTE_PL = "ARTE.PL"; public static final String BR = "BR"; public static final String DW = "DW"; public static final String HR = "HR"; diff --git a/src/main/java/mServer/crawler/FilmeSuchen.java b/src/main/java/mServer/crawler/FilmeSuchen.java index b7b821bb7..6d97652ab 100644 --- a/src/main/java/mServer/crawler/FilmeSuchen.java +++ b/src/main/java/mServer/crawler/FilmeSuchen.java @@ -27,7 +27,7 @@ import de.mediathekview.mlib.tool.Log; import mServer.crawler.sender.*; import mServer.crawler.sender.ard.ArdCrawler; -import mServer.crawler.sender.arte.MediathekArte; +import mServer.crawler.sender.arte.*; import mServer.crawler.sender.dreisat.DreiSatCrawler; import mServer.crawler.sender.dw.DwCrawler; import mServer.crawler.sender.kika.KikaApiCrawler; @@ -88,7 +88,12 @@ public FilmeSuchen() { mediathekListe.add(new ZdfCrawler(this, 0)); } if (crawlerList.contains("ARTE")) { - mediathekListe.add(new MediathekArte(this, 0)); + mediathekListe.add(new ArteCrawler(this, 0)); + mediathekListe.add(new ArteCrawler_FR(this, 0)); + mediathekListe.add(new ArteCrawler_EN(this, 1)); + mediathekListe.add(new ArteCrawler_ES(this, 1)); + mediathekListe.add(new ArteCrawler_PL(this, 1)); + mediathekListe.add(new ArteCrawler_IT(this, 1)); } if (crawlerList.contains("DW")) { mediathekListe.add(new DwCrawler(this, 0)); @@ -97,7 +102,7 @@ public FilmeSuchen() { mediathekListe.add(new KikaApiCrawler(this, 0)); } if (crawlerList.contains("3SAT")) { - mediathekListe.add(new DreiSatCrawler(this, 1)); + mediathekListe.add(new DreiSatCrawler(this, 0)); } if (crawlerList.contains("SR")) { mediathekListe.add(new SrCrawler(this, 1)); @@ -112,7 +117,7 @@ public FilmeSuchen() { mediathekListe.add(new OrfOnCrawler(this, 1)); } if (crawlerList.contains("PHONIX")) { - mediathekListe.add(new PhoenixCrawler(this, 1)); + mediathekListe.add(new PhoenixCrawler(this, 0)); } } diff --git a/src/main/java/mServer/crawler/sender/arte/ArteCategoryFilmListDeserializer.java b/src/main/java/mServer/crawler/sender/arte/ArteCategoryFilmListDeserializer.java deleted file mode 100644 index bec62c5c0..000000000 --- a/src/main/java/mServer/crawler/sender/arte/ArteCategoryFilmListDeserializer.java +++ /dev/null @@ -1,46 +0,0 @@ -package mServer.crawler.sender.arte; - -import com.google.gson.*; -import de.mediathekview.mlib.tool.Log; - -import java.lang.reflect.Type; -import java.util.Optional; - -/** - * Deserialisiert Ergebnisse der Anfrage den Filmen einer Kategorie. - * Beispiel-URL: - * https://www.arte.tv/guide/api/api/zones/de/web/videos_subcategory_CMG/?page=1&limit=100 - */ -public class ArteCategoryFilmListDeserializer extends ArteListBaseDeserializer implements JsonDeserializer { - - private static final String JSON_ELEMENT_CONTENT = "content"; - private static final String JSON_ELEMENT_VALUE = "value"; - private static final String JSON_ELEMENT_ZONES = "zones"; - - @Override - public ArteCategoryFilmsDTO deserialize(JsonElement aJsonElement, Type aType, JsonDeserializationContext aContext) throws JsonParseException { - ArteCategoryFilmsDTO dto = new ArteCategoryFilmsDTO(); - - JsonElement rootElement = aJsonElement; - if(aJsonElement.getAsJsonObject().has(JSON_ELEMENT_VALUE)) { - rootElement = aJsonElement.getAsJsonObject().get(JSON_ELEMENT_VALUE); - } - final JsonElement zoneElement = rootElement.getAsJsonObject().get(JSON_ELEMENT_ZONES); - if (zoneElement == null || zoneElement.isJsonNull() || !zoneElement.isJsonArray()) { - Log.errorLog(12834940, "zones element not found"); - return dto; - } - - for (JsonElement jsonElement : zoneElement.getAsJsonArray()) { - if(jsonElement.getAsJsonObject().has(JSON_ELEMENT_CONTENT)) { - final JsonObject contentObject = jsonElement.getAsJsonObject().get(JSON_ELEMENT_CONTENT).getAsJsonObject(); - extractProgramIdFromData(contentObject, dto); - - Optional url = parsePagination(contentObject); - url.ifPresent(dto::setNextPageUrl); - } - } - - return dto; - } -} diff --git a/src/main/java/mServer/crawler/sender/arte/ArteCategoryFilmsDTO.java b/src/main/java/mServer/crawler/sender/arte/ArteCategoryFilmsDTO.java deleted file mode 100644 index 70b2aeb7c..000000000 --- a/src/main/java/mServer/crawler/sender/arte/ArteCategoryFilmsDTO.java +++ /dev/null @@ -1,37 +0,0 @@ -package mServer.crawler.sender.arte; - -import java.util.HashSet; -import java.util.Set; - -public class ArteCategoryFilmsDTO { - - private final Set programIds = new HashSet<>(); - private final Set collectionIds = new HashSet<>(); - - private String nextPageUrl; - - public void addProgramId(String aProgramId) { - programIds.add(aProgramId); - } - public void addCollection(String aCollectionId) { - collectionIds.add(aCollectionId); - } - - public Set getProgramIds() { - return programIds; - } - public Set getCollectionIds() { - return collectionIds; - } - - public boolean hasNextPage() { - return nextPageUrl != null && !nextPageUrl.isEmpty(); - } - - public String getNextPageUrl() { - return nextPageUrl; - } - public void setNextPageUrl(String url) { - nextPageUrl = url; - } -} diff --git a/src/main/java/mServer/crawler/sender/arte/ArteCollectionChildDeserializer.java b/src/main/java/mServer/crawler/sender/arte/ArteCollectionChildDeserializer.java deleted file mode 100644 index f659d3768..000000000 --- a/src/main/java/mServer/crawler/sender/arte/ArteCollectionChildDeserializer.java +++ /dev/null @@ -1,42 +0,0 @@ -package mServer.crawler.sender.arte; - -import com.google.gson.*; -import mServer.crawler.sender.base.JsonUtils; - -import java.lang.reflect.Type; - -public class ArteCollectionChildDeserializer implements JsonDeserializer { - private static final String ATTRIBUTE_PROGRAM_ID = "programId"; - private static final String ELEMENT_PROGRAMS = "programs"; - private static final String ELEMENT_VIDEOS = "videos"; - - public ArteCategoryFilmsDTO deserialize( - final JsonElement aJsonElement, - final Type aType, - final JsonDeserializationContext aJsonDeserializationContext) - throws JsonParseException { - final ArteCategoryFilmsDTO result = new ArteCategoryFilmsDTO(); - if (aJsonElement.isJsonObject()) { - final JsonObject mainObj = aJsonElement.getAsJsonObject(); - - if (JsonUtils.checkTreePath(mainObj, ELEMENT_PROGRAMS)) { - final JsonArray programs = mainObj.get(ELEMENT_PROGRAMS).getAsJsonArray(); - programs.forEach( - program -> { - final JsonObject programObject = program.getAsJsonObject(); - if (JsonUtils.checkTreePath(programObject, ELEMENT_VIDEOS)) { - programObject - .get(ELEMENT_VIDEOS) - .getAsJsonArray() - .forEach( - filmElement -> - JsonUtils.getAttributeAsString(filmElement.getAsJsonObject(), ATTRIBUTE_PROGRAM_ID) - .ifPresent(result::addProgramId)); - } - }); - } - } - return result; - } - -} diff --git a/src/main/java/mServer/crawler/sender/arte/ArteCollectionParentDeserializer.java b/src/main/java/mServer/crawler/sender/arte/ArteCollectionParentDeserializer.java deleted file mode 100644 index ef5dfc917..000000000 --- a/src/main/java/mServer/crawler/sender/arte/ArteCollectionParentDeserializer.java +++ /dev/null @@ -1,41 +0,0 @@ -package mServer.crawler.sender.arte; - - -import com.google.gson.*; -import mServer.crawler.sender.base.JsonUtils; - -import java.lang.reflect.Type; -import java.util.Optional; - -public class ArteCollectionParentDeserializer implements JsonDeserializer { - private static final String ATTRIBUTE_KIND = "kind"; - private static final String ATTRIBUTE_PROGRAM_ID = "programId"; - private static final String ELEMENT_PROGRAMS = "programs"; - private static final String ELEMENT_CHILDREN = "children"; - - public ArteCategoryFilmsDTO deserialize(final JsonElement aJsonElement, final Type aType, final JsonDeserializationContext aJsonDeserializationContext) throws JsonParseException { - final ArteCategoryFilmsDTO result = new ArteCategoryFilmsDTO(); - if (aJsonElement.isJsonObject()) { - final JsonObject mainObj = aJsonElement.getAsJsonObject(); - - if (JsonUtils.checkTreePath(mainObj, ELEMENT_PROGRAMS)) { - final JsonArray programs = mainObj.get(ELEMENT_PROGRAMS).getAsJsonArray(); - programs.forEach(program -> { - final JsonObject programObject = program.getAsJsonObject(); - if (JsonUtils.checkTreePath(programObject, ELEMENT_CHILDREN)) { - programObject.get(ELEMENT_CHILDREN).getAsJsonArray().forEach(filmElement -> { - final JsonObject filmObject = filmElement.getAsJsonObject(); - final Optional kind = JsonUtils.getAttributeAsString(filmObject, ATTRIBUTE_KIND); - final Optional programId = JsonUtils.getAttributeAsString(filmObject, ATTRIBUTE_PROGRAM_ID); - - if (kind.isPresent() && kind.get().equalsIgnoreCase("TV_SERIES") && programId.isPresent()) { - result.addCollection(programId.get()); - } - }); - } - }); - } - } - return result; - } -} diff --git a/src/main/java/mServer/crawler/sender/arte/ArteConstants.java b/src/main/java/mServer/crawler/sender/arte/ArteConstants.java new file mode 100644 index 000000000..9c21ac9ca --- /dev/null +++ b/src/main/java/mServer/crawler/sender/arte/ArteConstants.java @@ -0,0 +1,10 @@ +package mServer.crawler.sender.arte; + +public class ArteConstants { + public static final String VIDEOS_URL ="https://api.arte.tv/api/opa/v3/videos?limit=100&page=%s&sort=-creationDate&language=%s"; + public static final String VIDEOS_URL_ALT ="https://api.arte.tv/api/opa/v3/videos?limit=100&page=%s&sort=creationDate&language=%s"; + public static final String VIDEO_URL ="https://www.arte.tv/hbbtvv2/services/web/index.php/OPA/v3/streams/%s/%s/%s"; //PROGRAMID/KIND/LANG + public static final String API_TOKEN = "Bearer Nzc1Yjc1ZjJkYjk1NWFhN2I2MWEwMmRlMzAzNjI5NmU3NWU3ODg4ODJjOWMxNTMxYzEzZGRjYjg2ZGE4MmIwOA"; + private ArteConstants() {} + +} diff --git a/src/main/java/mServer/crawler/sender/arte/ArteCrawler.java b/src/main/java/mServer/crawler/sender/arte/ArteCrawler.java new file mode 100644 index 000000000..7e6f4ab10 --- /dev/null +++ b/src/main/java/mServer/crawler/sender/arte/ArteCrawler.java @@ -0,0 +1,113 @@ +package mServer.crawler.sender.arte; + +import com.google.gson.JsonElement; +import de.mediathekview.mlib.Config; +import de.mediathekview.mlib.Const; +import de.mediathekview.mlib.daten.DatenFilm; +import de.mediathekview.mlib.tool.Log; +import mServer.crawler.CrawlerTool; +import mServer.crawler.FilmeSuchen; +import mServer.crawler.sender.MediathekCrawler; +import mServer.crawler.sender.arte.json.ArteVideoInfoDto; +import mServer.crawler.sender.arte.tasks.ArteDtoVideo2FilmTask; +import mServer.crawler.sender.arte.tasks.ArteVideoInfoTask; +import mServer.crawler.sender.arte.tasks.ArteVideoLinkTask; +import mServer.crawler.sender.base.JsonUtils; +import mServer.crawler.sender.base.JsoupConnection; +import mServer.crawler.sender.base.TopicUrlDTO; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.io.IOException; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.RecursiveTask; + +public class ArteCrawler extends MediathekCrawler { + private static final Logger LOG = LogManager.getLogger(ArteCrawler.class); + private final JsoupConnection jsoupConnection; + + public ArteCrawler(FilmeSuchen ssearch, int startPrio) { + this(ssearch, startPrio, Const.ARTE_DE); + } + + protected ArteCrawler(FilmeSuchen ssearch, int startPrio, String sender) { + super(ssearch, sender,/* threads */ 1, /* urlWarten */ 200, startPrio); + this.jsoupConnection = new JsoupConnection(60, 4); + } + + protected ArteLanguage getLanguage() { + return ArteLanguage.DE; + } + + @Override + protected RecursiveTask> createCrawlerTask() { + + try { + final ArteVideoInfoTask aArteRestVideoInfoTask; + // DO NOT overload - maximumUrlsPerTask used to reduce threads to 4 + aArteRestVideoInfoTask = new ArteVideoInfoTask(this, createVideosQueue()); + final ConcurrentLinkedQueue videos = new ConcurrentLinkedQueue<>(); + videos.addAll(aArteRestVideoInfoTask.fork().join()); + // + Log.sysLog(getSendername() + " Anzahl video info: " + videos.size()); + // + final ConcurrentLinkedQueue videosWithLink = new ConcurrentLinkedQueue<>(); + final ArteVideoLinkTask aArteRestVideosTask = new ArteVideoLinkTask(this, videos); + videosWithLink.addAll(aArteRestVideosTask.fork().join()); + // + Log.sysLog(getSendername() + " Anzahl video links: " + videosWithLink.size()); + // + return new ArteDtoVideo2FilmTask(this, new ConcurrentLinkedQueue<>(videosWithLink), getSendername()); + + } catch (final Exception ex) { + LOG.fatal("Exception in {} crawler.", getSendername(), ex); + } + return null; + } + + private ConcurrentLinkedQueue createVideosQueue() { + int maxPages = getMaxPagesForOverview(); + final ConcurrentLinkedQueue root = new ConcurrentLinkedQueue<>(); + String rootUrl = String.format(ArteConstants.VIDEOS_URL, 1, getLanguage().toString().toLowerCase()); + root.add(new TopicUrlDTO("all videos1", rootUrl)); + if (maxPages >= 100) { + String rootUrl2 = String.format(ArteConstants.VIDEOS_URL_ALT, 1, getLanguage().toString().toLowerCase()); + root.add(new TopicUrlDTO("all videos2", rootUrl2)); + } + return root; + } + + private int getMaxPagesForOverview() { + final int naturalLimit = Math.min(100, getMaximumSubpages()); + String rootUrl = String.format(ArteConstants.VIDEOS_URL, 1, getLanguage().toString().toLowerCase()); + String[] path = {"meta", "videos", "pages"}; + try { + final Map headers = Map.of( + "Accept", "application/json", + "Content-Type", "application/json", + "Authorization", ArteConstants.API_TOKEN + ); + JsonElement element = jsoupConnection.requestBodyAsJsonElement(rootUrl, headers); + Optional pages = JsonUtils.getElementValueAsInteger(element, path); + if (pages.isPresent()) { + return Math.min(pages.get(), naturalLimit); + } + } catch (IOException e) { + LOG.error("getMaxPagesForOverview", e); + } + return naturalLimit; + } + + private int getMaximumSubpages() { + if (CrawlerTool.loadLongMax()) { + return 10; + } else { + return 1; + } + } +} + + \ No newline at end of file diff --git a/src/main/java/mServer/crawler/sender/arte/ArteCrawler_EN.java b/src/main/java/mServer/crawler/sender/arte/ArteCrawler_EN.java new file mode 100644 index 000000000..b25c2eec9 --- /dev/null +++ b/src/main/java/mServer/crawler/sender/arte/ArteCrawler_EN.java @@ -0,0 +1,17 @@ +package mServer.crawler.sender.arte; + +import de.mediathekview.mlib.Const; +import mServer.crawler.FilmeSuchen; + +public class ArteCrawler_EN extends ArteCrawler { + + public ArteCrawler_EN(FilmeSuchen ssearch, int startPrio) { + super(ssearch, startPrio, Const.ARTE_EN); + } + + @Override + protected ArteLanguage getLanguage() { + return ArteLanguage.EN; + } + +} diff --git a/src/main/java/mServer/crawler/sender/arte/ArteCrawler_ES.java b/src/main/java/mServer/crawler/sender/arte/ArteCrawler_ES.java new file mode 100644 index 000000000..c0918cfb8 --- /dev/null +++ b/src/main/java/mServer/crawler/sender/arte/ArteCrawler_ES.java @@ -0,0 +1,17 @@ +package mServer.crawler.sender.arte; + +import de.mediathekview.mlib.Const; +import mServer.crawler.FilmeSuchen; + +public class ArteCrawler_ES extends ArteCrawler { + + public ArteCrawler_ES(FilmeSuchen ssearch, int startPrio) { + super(ssearch, startPrio, Const.ARTE_ES); + } + + @Override + protected ArteLanguage getLanguage() { + return ArteLanguage.ES; + } + +} diff --git a/src/main/java/mServer/crawler/sender/arte/ArteCrawler_FR.java b/src/main/java/mServer/crawler/sender/arte/ArteCrawler_FR.java new file mode 100644 index 000000000..97f31bc4d --- /dev/null +++ b/src/main/java/mServer/crawler/sender/arte/ArteCrawler_FR.java @@ -0,0 +1,17 @@ +package mServer.crawler.sender.arte; + +import de.mediathekview.mlib.Const; +import mServer.crawler.FilmeSuchen; + +public class ArteCrawler_FR extends ArteCrawler { + + public ArteCrawler_FR(FilmeSuchen ssearch, int startPrio) { + super(ssearch, startPrio, Const.ARTE_FR); + } + + @Override + protected ArteLanguage getLanguage() { + return ArteLanguage.FR; + } + +} diff --git a/src/main/java/mServer/crawler/sender/arte/ArteCrawler_IT.java b/src/main/java/mServer/crawler/sender/arte/ArteCrawler_IT.java new file mode 100644 index 000000000..39e831081 --- /dev/null +++ b/src/main/java/mServer/crawler/sender/arte/ArteCrawler_IT.java @@ -0,0 +1,17 @@ +package mServer.crawler.sender.arte; + +import de.mediathekview.mlib.Const; +import mServer.crawler.FilmeSuchen; + +public class ArteCrawler_IT extends ArteCrawler { + + public ArteCrawler_IT(FilmeSuchen ssearch, int startPrio) { + super(ssearch, startPrio, Const.ARTE_IT); + } + + @Override + protected ArteLanguage getLanguage() { + return ArteLanguage.IT; + } + +} diff --git a/src/main/java/mServer/crawler/sender/arte/ArteCrawler_PL.java b/src/main/java/mServer/crawler/sender/arte/ArteCrawler_PL.java new file mode 100644 index 000000000..48b0d2203 --- /dev/null +++ b/src/main/java/mServer/crawler/sender/arte/ArteCrawler_PL.java @@ -0,0 +1,16 @@ +package mServer.crawler.sender.arte; + +import de.mediathekview.mlib.Const; +import mServer.crawler.FilmeSuchen; + +public class ArteCrawler_PL extends ArteCrawler { + public ArteCrawler_PL(FilmeSuchen ssearch, int startPrio) { + super(ssearch, startPrio, Const.ARTE_PL); + } + + @Override + protected ArteLanguage getLanguage() { + return ArteLanguage.PL; + } + +} diff --git a/src/main/java/mServer/crawler/sender/arte/ArteDatenFilmDeserializer.java b/src/main/java/mServer/crawler/sender/arte/ArteDatenFilmDeserializer.java deleted file mode 100644 index 648df0931..000000000 --- a/src/main/java/mServer/crawler/sender/arte/ArteDatenFilmDeserializer.java +++ /dev/null @@ -1,51 +0,0 @@ -package mServer.crawler.sender.arte; - -import java.lang.reflect.Type; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Objects; -import java.util.stream.Collectors; - -import com.google.gson.JsonDeserializationContext; -import com.google.gson.JsonDeserializer; -import com.google.gson.JsonElement; -import com.google.gson.JsonParseException; - -import de.mediathekview.mlib.daten.DatenFilm; -import de.mediathekview.mlib.daten.ListeFilme; -import java.util.Set; - -public class ArteDatenFilmDeserializer implements JsonDeserializer { - - private static final String JSON_ELEMENT_VIDEOS = "videos"; - - private final String langCode; - private final String senderName; - - public ArteDatenFilmDeserializer(String aLangCode, String aSenderName) { - langCode = aLangCode; - senderName = aSenderName; - } - - @Override - public ListeFilme deserialize(JsonElement aJsonElement, Type aType, JsonDeserializationContext aContext) throws JsonParseException { - ListeFilme listeFilme = new ListeFilme(); - - Collection futureFilme = new ArrayList<>(); - for (JsonElement jsonElement : aJsonElement.getAsJsonObject().get(JSON_ELEMENT_VIDEOS).getAsJsonArray()) { - Set films = new ArteJsonObjectToDatenFilmCallable(jsonElement.getAsJsonObject(), langCode, senderName).call(); - for (DatenFilm film : films) { - futureFilme.add(film); - } - } - - final List list = futureFilme.parallelStream() - .filter(Objects::nonNull) - .collect(Collectors.toList()); - listeFilme.addAll(list); - list.clear(); - - return listeFilme; - } -} diff --git a/src/main/java/mServer/crawler/sender/arte/ArteHttpClient.java b/src/main/java/mServer/crawler/sender/arte/ArteHttpClient.java deleted file mode 100644 index 94b974e5c..000000000 --- a/src/main/java/mServer/crawler/sender/arte/ArteHttpClient.java +++ /dev/null @@ -1,116 +0,0 @@ -package mServer.crawler.sender.arte; - -import com.google.common.util.concurrent.RateLimiter; -import com.google.gson.Gson; -import de.mediathekview.mlib.Config; -import de.mediathekview.mlib.tool.Log; -import de.mediathekview.mlib.tool.MVHttpClient; -import okhttp3.OkHttpClient; -import okhttp3.Request; -import okhttp3.Request.Builder; -import okhttp3.Response; -import okhttp3.ResponseBody; -import org.apache.logging.log4j.Logger; - -import java.io.IOException; -import java.util.concurrent.TimeUnit; -import java.util.logging.Level; -import mServer.crawler.FilmeSuchen; -import mServer.crawler.RunSender; - -/** - * Hilfsklasse für Arte Requests - */ -public class ArteHttpClient { - - public static final String AUTH_HEADER = "Authorization"; - public static final String AUTH_TOKEN = "Bearer Nzc1Yjc1ZjJkYjk1NWFhN2I2MWEwMmRlMzAzNjI5NmU3NWU3ODg4ODJjOWMxNTMxYzEzZGRjYjg2ZGE4MmIwOA"; - public static final String USER_AGENT = "User-Agent"; - public static final String USER_AGENT_VALUE = "Mozilla/5.0"; - private static final Builder BUILDER_OPA; - private static final Builder BUILDER; - - private static final RateLimiter LIMITER; - - static { - BUILDER_OPA = new Request.Builder().addHeader(USER_AGENT, USER_AGENT_VALUE) - .addHeader(AUTH_HEADER, AUTH_TOKEN); - - BUILDER = new Request.Builder().addHeader(USER_AGENT, USER_AGENT_VALUE); - LIMITER = RateLimiter.create(0.5); - } - - private static Request createRequest(String aUrl) { - Builder b; - if (aUrl.contains("/api/opa/")) { - b = BUILDER_OPA; - } else { - b = BUILDER; - } - - return b.url(aUrl).build(); - } - - public static T executeRequest(final String sender, Logger logger, Gson gson, String aUrl, Class aDtoType) { - T result = null; - - java.util.logging.Logger x = java.util.logging.Logger.getLogger(OkHttpClient.class.getName()); - x.setLevel(Level.FINE); - - try { - Request request = createRequest(aUrl); - - boolean stop = false; - - int count = 0; - do { - if (!aUrl.contains("www.arte.tv")) { - LIMITER.acquire(); - } - - try (Response response = MVHttpClient.getInstance().getHttpClient().newCall(request).execute(); - ResponseBody body = response.body()) { - count++; - FilmeSuchen.listeSenderLaufen.inc(sender, RunSender.Count.ANZAHL); - //response can be successful but empty...and we have to close both! - if (response.isSuccessful() && body != null) { - final String content = body.string(); - result = gson.fromJson(content, aDtoType); - FilmeSuchen.listeSenderLaufen.inc(sender, RunSender.Count.SUM_DATA_BYTE, content.length()); - FilmeSuchen.listeSenderLaufen.inc(sender, RunSender.Count.SUM_TRAFFIC_BYTE, content.length()); - stop = true; - } else { - if (response.code() != 429) { - logger.error(String.format("ARTE Request '%s' failed: %s", aUrl, response.code())); - Log.sysLog(String.format("ARTE Request '%s' failed: %s", aUrl, response.code())); - FilmeSuchen.listeSenderLaufen.inc(sender, RunSender.Count.FEHLER); - stop = true; - } else { - // bei 429 (too many requests) warten und nochmal versuchen - // Wartezeit von 60s aus Header Retry-After - String retryAfter = response.header("Retry-After", ""); - Log.sysLog("429: " + aUrl + " - retry after: " + retryAfter); - try { - TimeUnit.MILLISECONDS.sleep(60000); - FilmeSuchen.listeSenderLaufen.inc(sender, RunSender.Count.FEHLVERSUCHE); - } catch (InterruptedException ignored) { - } - if (count > 3) { - stop = true; - FilmeSuchen.listeSenderLaufen.inc(sender, RunSender.Count.FEHLER); - Log.errorLog(894330765, "ArteHttpClient failed - " + aUrl); - } - } - } - - } - } while (!stop && !Config.getStop()); - - } catch (IOException ex) { - logger.error("Beim laden der Filme für Arte kam es zu Verbindungsproblemen.", ex); - Log.errorLog(3895449, ex); - } - - return result; - } -} diff --git a/src/main/java/mServer/crawler/sender/arte/ArteJsonObjectToDatenFilmCallable.java b/src/main/java/mServer/crawler/sender/arte/ArteJsonObjectToDatenFilmCallable.java deleted file mode 100644 index 8b6cfc1e7..000000000 --- a/src/main/java/mServer/crawler/sender/arte/ArteJsonObjectToDatenFilmCallable.java +++ /dev/null @@ -1,56 +0,0 @@ -package mServer.crawler.sender.arte; - -import java.util.concurrent.Callable; - -import de.mediathekview.mlib.tool.Log; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -import com.google.gson.JsonObject; - -import de.mediathekview.mlib.daten.DatenFilm; -import java.util.HashSet; -import java.util.Set; - -public class ArteJsonObjectToDatenFilmCallable implements Callable> { - - private static final Logger LOG = LogManager.getLogger(ArteJsonObjectToDatenFilmCallable.class); - - private static final String JSON_ELEMENT_KEY_PROGRAM_ID = "programId"; - - private final JsonObject jsonObject; - private final String langCode; - private final String senderName; - - public ArteJsonObjectToDatenFilmCallable(JsonObject aJsonObjec, String aLangCode, String aSenderName) { - jsonObject = aJsonObjec; - langCode = aLangCode; - senderName = aSenderName; - } - - @Override - public Set call() { - Set films = new HashSet<>(); - try { - if (isValidProgramObject(jsonObject)) { - String programId = getElementValue(jsonObject, JSON_ELEMENT_KEY_PROGRAM_ID); - films = new ArteProgramIdToDatenFilmCallable(programId, langCode, senderName).call(); - } - } catch (Exception e) { - e.printStackTrace(); - LOG.error(e); - Log.errorLog(348833773, e); - } - - return films; - } - - private static String getElementValue(JsonObject jsonObject, String elementName) { - return !jsonObject.get(elementName).isJsonNull() ? jsonObject.get(elementName).getAsString() : ""; - } - - private static boolean isValidProgramObject(JsonObject programObject) { - return programObject.has(JSON_ELEMENT_KEY_PROGRAM_ID) - && !programObject.get(JSON_ELEMENT_KEY_PROGRAM_ID).isJsonNull(); - } -} diff --git a/src/main/java/mServer/crawler/sender/arte/ArteLanguage.java b/src/main/java/mServer/crawler/sender/arte/ArteLanguage.java new file mode 100644 index 000000000..cdfb008a7 --- /dev/null +++ b/src/main/java/mServer/crawler/sender/arte/ArteLanguage.java @@ -0,0 +1,21 @@ +package mServer.crawler.sender.arte; + +/** The supported arte languages. */ +public enum ArteLanguage { + DE("DE"), + FR("FR"), + EN("EN"), + ES("ES"), + PL("PL"), + IT("IT"); + + private final String languageCode; + + ArteLanguage(final String aLanguageCode) { + languageCode = aLanguageCode; + } + + public String getLanguageCode() { + return languageCode; + } +} diff --git a/src/main/java/mServer/crawler/sender/arte/ArteListBaseDeserializer.java b/src/main/java/mServer/crawler/sender/arte/ArteListBaseDeserializer.java deleted file mode 100644 index d2b9f9a9e..000000000 --- a/src/main/java/mServer/crawler/sender/arte/ArteListBaseDeserializer.java +++ /dev/null @@ -1,64 +0,0 @@ -package mServer.crawler.sender.arte; - -import com.google.gson.JsonElement; -import com.google.gson.JsonObject; -import de.mediathekview.mlib.tool.Log; -import mServer.crawler.sender.base.JsonUtils; -import mServer.crawler.sender.base.UrlUtils; - -import java.util.Optional; - -public abstract class ArteListBaseDeserializer { - - private static final String JSON_ELEMENT_DATA = "data"; - private static final String JSON_ELEMENT_PROGRAMID = "programId"; - private static final String JSON_ELEMENT_PAGINATION = "pagination"; - private static final String JSON_ELEMENT_LINKS = "links"; - private static final String JSON_ELEMENT_NEXT = "next"; - - private static String buildUrl(String nextUrl) { - final String baseUrl = UrlUtils.getBaseUrl(nextUrl); - return UrlUtils.addDomainIfMissing( - nextUrl - .replace("/api/emac/", "/api/rproxy/emac/") - // fix non reachable host - .replace(baseUrl, "https://www.arte.tv") - , "https://www.arte.tv"); - } - - protected Optional parsePagination(JsonObject jsonObject) { - if (jsonObject.has(JSON_ELEMENT_PAGINATION) && !jsonObject.get(JSON_ELEMENT_PAGINATION).isJsonNull()) { - final JsonObject pagionationObject = jsonObject.get(JSON_ELEMENT_PAGINATION).getAsJsonObject(); - if (pagionationObject.has(JSON_ELEMENT_LINKS)) { - final JsonObject linksObject = pagionationObject.get(JSON_ELEMENT_LINKS).getAsJsonObject(); - final Optional nextUrl = JsonUtils.getAttributeAsString(linksObject, JSON_ELEMENT_NEXT); - if (nextUrl.isPresent()) { - return Optional.of(buildUrl(nextUrl.get())); - } - } - } - return Optional.empty(); - } - - protected void extractProgramIdFromData(JsonObject jsonObectWithData, ArteCategoryFilmsDTO dto) { - if (jsonObectWithData.has(JSON_ELEMENT_DATA)) { - for (JsonElement dataElement : jsonObectWithData.get(JSON_ELEMENT_DATA).getAsJsonArray()) { - if (!dataElement.getAsJsonObject().get(JSON_ELEMENT_PROGRAMID).isJsonNull()) { - Optional programId = JsonUtils.getAttributeAsString(dataElement.getAsJsonObject(), JSON_ELEMENT_PROGRAMID); - if (programId.isPresent()) { - if (programId.get().startsWith("RC-")) { - try { - long collectionId = Long.parseLong(programId.get().replace("RC-", "")); - dto.addCollection(String.format("RC-%06d", collectionId)); - } catch (NumberFormatException e) { - Log.errorLog(12834939, "Invalid collection id: " + programId); - } - } else { - dto.addProgramId(programId.get()); - } - } - } - } - } - } -} diff --git a/src/main/java/mServer/crawler/sender/arte/ArteProgramIdToDatenFilmCallable.java b/src/main/java/mServer/crawler/sender/arte/ArteProgramIdToDatenFilmCallable.java deleted file mode 100644 index 100c258a1..000000000 --- a/src/main/java/mServer/crawler/sender/arte/ArteProgramIdToDatenFilmCallable.java +++ /dev/null @@ -1,142 +0,0 @@ -package mServer.crawler.sender.arte; - -import com.google.gson.Gson; -import com.google.gson.GsonBuilder; -import de.mediathekview.mlib.daten.DatenFilm; -import de.mediathekview.mlib.tool.Log; -import mServer.crawler.CrawlerTool; -import mServer.crawler.FilmeSuchen; -import mServer.crawler.RunSender; -import mServer.crawler.sender.base.GeoLocations; -import mServer.crawler.sender.base.Qualities; -import mServer.tool.MserverDatumZeit; -import org.apache.commons.lang3.time.FastDateFormat; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -import java.time.LocalTime; -import java.util.Calendar; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.Callable; - -/** - * Liest anhand einer ProgramId die Daten eines Films - */ -public class ArteProgramIdToDatenFilmCallable implements Callable> { - - private static final Logger LOG = LogManager.getLogger(ArteProgramIdToDatenFilmCallable.class); - - private static final String ARTE_VIDEO_INFORMATION_URL_PATTERN = "https://www.arte.tv/hbbtvv2/services/web/index.php/OPA/v3/streams/%s/SHOW/%s"; - private static final String ARTE_VIDEO_INFORMATION_URL_PATTERN_2 = "https://api.arte.tv/api/opa/v3/programs/%s/%s"; // Für broadcastBeginRounded - - private final FastDateFormat broadcastDateFormat = FastDateFormat.getInstance("yyyy-MM-dd'T'HH:mm:ssX");//2016-10-29T16:15:00Z - - private final String programId; - private final String langCode; - private final String senderName; - - private final Calendar today; - - public ArteProgramIdToDatenFilmCallable(String aProgramId, String aLangCode, String aSenderName) { - programId = aProgramId; - langCode = aLangCode; - senderName = aSenderName; - today = Calendar.getInstance(); - } - - @Override - public Set call() throws Exception { - Set films = new HashSet<>(); - - Gson gson = new GsonBuilder() - .registerTypeAdapter(ArteVideoDTO.class, new ArteVideoDeserializer(senderName)) - .registerTypeAdapter(ArteVideoDetailsDTO.class, new ArteVideoDetailsDeserializer(today)) - .create(); - - String videosUrl = String.format(ARTE_VIDEO_INFORMATION_URL_PATTERN, programId, langCode); - ArteVideoDTO video = ArteHttpClient.executeRequest(senderName, LOG, gson, videosUrl, ArteVideoDTO.class); - - if (video != null) { - - ArteVideoDetailsDTO details = getVideoDetails(gson, programId); - if (details != null) { - LocalTime durationAsTime; - // update duration if video contains different duration and the difference is larger than 1 - // e.g. trailers has the original film length in film details but the correct trailer length in video details - // but difference is 1 second, the film length in film details is the correct one - if (!video.getDuration().isZero() - && video.getDuration().getSeconds() != (details.getDuration().getSeconds() + 1)) { - durationAsTime = durationAsTime(video.getDuration().getSeconds()); - } else { - durationAsTime = durationAsTime(details.getDuration().getSeconds()); - } - if (!video.getVideoUrls().isEmpty()) { - if (video.getVideoUrls().containsKey(Qualities.NORMAL)) { - films.add(createFilm(details.getTheme(), details.getWebsite(), details.getTitle(), video.getVideoUrls(), details, durationAsTime, details.getDescription())); - } else { - Log.sysLog(String.format("%s: no normal video url found for film %s, but small/hd", senderName, programId)); - FilmeSuchen.listeSenderLaufen.inc(senderName, RunSender.Count.FEHLER); - } - } - - if (video.getVideoUrlsWithAudioDescription().containsKey(Qualities.NORMAL)) { - films.add(createFilm(details.getTheme(), details.getWebsite(), details.getTitle() + " (Audiodeskription)", video.getVideoUrlsWithAudioDescription(), details, durationAsTime, details.getDescription())); - } - if (video.getVideoUrlsWithSubtitle().containsKey(Qualities.NORMAL)) { - films.add(createFilm(details.getTheme(), details.getWebsite(), details.getTitle() + " (mit Untertitel)", video.getVideoUrlsWithSubtitle(), details, durationAsTime, details.getDescription())); - } - if (video.getVideoUrlsOriginal().containsKey(Qualities.NORMAL)) { - films.add(createFilm(details.getTheme(), details.getWebsite(), details.getTitle() + " (Originalversion)", video.getVideoUrlsOriginal(), details, durationAsTime, details.getDescription())); - } - if (video.getVideoUrlsOriginalWithSubtitle().containsKey(Qualities.NORMAL)) { - films.add(createFilm(details.getTheme(), details.getWebsite(), details.getTitle() + " (Originalversion mit Untertitel)", video.getVideoUrlsOriginalWithSubtitle(), details, durationAsTime, details.getDescription())); - } - } else { - Log.errorLog(8572677, "arte: no program found " + programId); - } - } else { - Log.errorLog(84572678, "arte: no video found " + programId); - } - - return films; - } - - private ArteVideoDetailsDTO getVideoDetails(Gson gson, String programId) { - - //https://api.arte.tv/api/opa/v3/programs/[language:de/fr]/[programId] - String videosUrlVideoDetails2 = String.format(ARTE_VIDEO_INFORMATION_URL_PATTERN_2, langCode, programId); - return ArteHttpClient.executeRequest(senderName, LOG, gson, videosUrlVideoDetails2, ArteVideoDetailsDTO.class); - } - - private DatenFilm createFilm(String thema, String urlWeb, String titel, Map videos, ArteVideoDetailsDTO details, LocalTime durationAsTime, String beschreibung) { - - String broadcastBegin = details.getBroadcastBegin(); - String date = MserverDatumZeit.formatDate(broadcastBegin, broadcastDateFormat); - String time = MserverDatumZeit.formatTime(broadcastBegin, broadcastDateFormat); - - DatenFilm film = new DatenFilm(senderName, thema, urlWeb, titel, videos.get(Qualities.NORMAL), "" /*urlRtmp*/, - date, time, durationAsTime.toSecondOfDay(), beschreibung); - if (videos.containsKey(Qualities.HD)) { - CrawlerTool.addUrlHd(film, videos.get(Qualities.HD)); - } - if (videos.containsKey(Qualities.SMALL)) { - CrawlerTool.addUrlKlein(film, videos.get(Qualities.SMALL)); - } - - if (details.getGeoLocation() != GeoLocations.GEO_NONE) { - film.arr[DatenFilm.FILM_GEO] = details.getGeoLocation().getDescription(); - } - - return film; - } - - private LocalTime durationAsTime(long aDurationInSeconds) { - LocalTime localTime = LocalTime.MIN; - - localTime = localTime.plusSeconds(aDurationInSeconds); - - return localTime; - } -} diff --git a/src/main/java/mServer/crawler/sender/arte/ArteVideoTypeMapper.java b/src/main/java/mServer/crawler/sender/arte/ArteRestVideoTypeMapper.java similarity index 79% rename from src/main/java/mServer/crawler/sender/arte/ArteVideoTypeMapper.java rename to src/main/java/mServer/crawler/sender/arte/ArteRestVideoTypeMapper.java index e89ac1961..85f8693b5 100644 --- a/src/main/java/mServer/crawler/sender/arte/ArteVideoTypeMapper.java +++ b/src/main/java/mServer/crawler/sender/arte/ArteRestVideoTypeMapper.java @@ -1,40 +1,61 @@ package mServer.crawler.sender.arte; import de.mediathekview.mlib.Const; +import mServer.crawler.sender.base.Qualities; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import java.util.Optional; -public class ArteVideoTypeMapper { +public class ArteRestVideoTypeMapper { - private static final Logger LOG = LogManager.getLogger(ArteVideoTypeMapper.class); + private static final Logger LOG = LogManager.getLogger(ArteRestVideoTypeMapper.class); - private ArteVideoTypeMapper() { + private ArteRestVideoTypeMapper() {} + + public static Optional mapQuality(String quality) { + switch (quality) { + case "EQ": + return Optional.of(Qualities.NORMAL); + case "HQ": + return Optional.of(Qualities.SMALL); + case "SQ": + return Optional.of(Qualities.HD); + case "MQ": + return Optional.empty(); + case "XQ": + return Optional.empty(); + default: + LOG.debug("unknown quality: {}", quality); + return Optional.empty(); + } } public static Optional map(String sender, String code) { switch (sender) { case Const.ARTE_DE: return mapGerman(code); - case MediathekArte.ARTE_EN: + case Const.ARTE_EN: return mapEnglish(code); - case MediathekArte.ARTE_ES: + case Const.ARTE_ES: return mapSpanish(code); case Const.ARTE_FR: return mapFrench(code); - case MediathekArte.ARTE_IT: + case Const.ARTE_IT: return mapItalian(code); - case MediathekArte.ARTE_PL: + case Const.ARTE_PL: return mapPolish(code); default: LOG.debug("unknown sender: {}", sender); return Optional.empty(); } } + private static Optional mapSpanish(String code) { switch (code) { + case "VE[ESP]": + return Optional.of(ArteVideoType.DEFAULT); case "VE[ESP]-STE[ESP]": case "VO-STE[ESP]": case "VOA-STE[ESP]": @@ -42,9 +63,6 @@ private static Optional mapSpanish(String code) { case "VOF-STE[ESP]": return Optional.of(ArteVideoType.ORIGINAL_WITH_SUBTITLE); default: - if (code.contains("ESP")) { - LOG.debug("add spanish: {}", code); - } return Optional.empty(); } } @@ -60,9 +78,6 @@ private static Optional mapEnglish(String code) { case "VOF-STE[ANG]": return Optional.of(ArteVideoType.ORIGINAL_WITH_SUBTITLE); default: - if (code.contains("ANG")) { - LOG.debug("add english: {}", code); - } return Optional.empty(); } } diff --git a/src/main/java/mServer/crawler/sender/arte/ArteSubPageDeserializer.java b/src/main/java/mServer/crawler/sender/arte/ArteSubPageDeserializer.java deleted file mode 100644 index 7dd96c3ab..000000000 --- a/src/main/java/mServer/crawler/sender/arte/ArteSubPageDeserializer.java +++ /dev/null @@ -1,28 +0,0 @@ -package mServer.crawler.sender.arte; - -import com.google.gson.*; - -import java.lang.reflect.Type; -import java.util.Optional; - -public class ArteSubPageDeserializer extends ArteListBaseDeserializer implements JsonDeserializer { - private static final String JSON_ELEMENT_VALUE = "value"; - - @Override - public ArteCategoryFilmsDTO deserialize(JsonElement aJsonElement, Type aType, JsonDeserializationContext aContext) throws JsonParseException { - final ArteCategoryFilmsDTO dto = new ArteCategoryFilmsDTO(); - - JsonElement rootElement = aJsonElement; - if (aJsonElement.getAsJsonObject().has(JSON_ELEMENT_VALUE)) { - rootElement = aJsonElement.getAsJsonObject().get(JSON_ELEMENT_VALUE); - } - - JsonObject rootObject = rootElement.getAsJsonObject(); - extractProgramIdFromData(rootObject, dto); - - Optional url = parsePagination(rootObject); - url.ifPresent(dto::setNextPageUrl); - - return dto; - } -} diff --git a/src/main/java/mServer/crawler/sender/arte/ArteVideoDTO.java b/src/main/java/mServer/crawler/sender/arte/ArteVideoDTO.java deleted file mode 100644 index 4b2c80c47..000000000 --- a/src/main/java/mServer/crawler/sender/arte/ArteVideoDTO.java +++ /dev/null @@ -1,70 +0,0 @@ -package mServer.crawler.sender.arte; - -import mServer.crawler.sender.base.Qualities; - -import java.time.Duration; -import java.util.EnumMap; -import java.util.Map; - -public class ArteVideoDTO { - - private Duration duration; - private final Map videoUrls; - private final Map videoUrlsWithSubtitle; - private final Map videoUrlsWithAudioDescription; - private final Map videoUrlsOriginal; - private final Map videoUrlsOriginalWithSubtitle; - - public ArteVideoDTO() { - videoUrls = new EnumMap<>(Qualities.class); - videoUrlsWithSubtitle = new EnumMap<>(Qualities.class); - videoUrlsWithAudioDescription = new EnumMap<>(Qualities.class); - videoUrlsOriginal = new EnumMap<>(Qualities.class); - videoUrlsOriginalWithSubtitle = new EnumMap<>(Qualities.class); - duration = Duration.ZERO; - } - - public void addVideo(Qualities aQualities, String aUrl) { - videoUrls.put(aQualities, aUrl); - } - - public void addVideoWithAudioDescription(Qualities qualities, String url) { - videoUrlsWithAudioDescription.put(qualities, url); - } - - public void addVideoWithSubtitle(Qualities qualities, String url) { - videoUrlsWithSubtitle.put(qualities, url); - } - public void addVideoOriginal(Qualities qualities, String url) { - videoUrlsOriginal.put(qualities, url); - } - public void addVideoOriginalWithSubtitle(Qualities qualities, String url) { - videoUrlsOriginalWithSubtitle.put(qualities, url); - } - - public Duration getDuration() { return duration; } - - public Map getVideoUrls() { - return videoUrls; - } - - public Map getVideoUrlsWithAudioDescription() { - return videoUrlsWithAudioDescription; - } - - public Map getVideoUrlsWithSubtitle() { - return videoUrlsWithSubtitle; - } - - public Map getVideoUrlsOriginal() { - return videoUrlsOriginal; - } - - public Map getVideoUrlsOriginalWithSubtitle() { - return videoUrlsOriginalWithSubtitle; - } - - public void setDuration(Duration duration) { - this.duration = duration; - } -} diff --git a/src/main/java/mServer/crawler/sender/arte/ArteVideoDeserializer.java b/src/main/java/mServer/crawler/sender/arte/ArteVideoDeserializer.java deleted file mode 100644 index a36d5a9eb..000000000 --- a/src/main/java/mServer/crawler/sender/arte/ArteVideoDeserializer.java +++ /dev/null @@ -1,89 +0,0 @@ -package mServer.crawler.sender.arte; - -import com.google.gson.*; -import mServer.crawler.sender.base.Qualities; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -import java.lang.reflect.Type; -import java.util.Optional; - -public class ArteVideoDeserializer - implements JsonDeserializer { - - private static final Logger LOG = LogManager.getLogger(ArteVideoDeserializer.class); - - private static final String JSON_OBJECT_KEY_PLAYER = "videoStreams"; - private static final String ATTRIBUTE_URL = "url"; - private static final String ATTRIBUTE_QUALITY = "quality"; - private static final String ATTRIBUTE_VERSION_CODE = "audioCode"; - - private final String sender; - - public ArteVideoDeserializer(String aSender) { - this.sender = aSender; - } - - @Override - public ArteVideoDTO deserialize( - JsonElement aJsonElement, Type aType, JsonDeserializationContext aContext) { - ArteVideoDTO arteVideoDTO = new ArteVideoDTO(); - if (aJsonElement.isJsonObject() - && aJsonElement.getAsJsonObject().has(JSON_OBJECT_KEY_PLAYER) - && aJsonElement.getAsJsonObject().get(JSON_OBJECT_KEY_PLAYER).isJsonArray()) { - JsonArray videoStreams = - aJsonElement.getAsJsonObject().get(JSON_OBJECT_KEY_PLAYER).getAsJsonArray(); - - videoStreams.forEach( - entry -> { - final JsonObject value = entry.getAsJsonObject(); - - final String code = value.get(ATTRIBUTE_VERSION_CODE).getAsString(); - final String quality = value.get(ATTRIBUTE_QUALITY).getAsString(); - final String url = value.get(ATTRIBUTE_URL).getAsString().replace("http://", "https://"); - - final Optional resolution = mapQuality(quality); - final Optional arteVideoType = ArteVideoTypeMapper.map(sender, code); - - if (resolution.isPresent() && arteVideoType.isPresent()) { - switch (arteVideoType.get()) { - case DEFAULT: - arteVideoDTO.addVideo(resolution.get(), url); - break; - case SUBTITLE_INCLUDED: - arteVideoDTO.addVideoWithSubtitle(resolution.get(), url); - break; - case AUDIO_DESCRIPTION: - arteVideoDTO.addVideoWithAudioDescription(resolution.get(), url); - break; - case ORIGINAL_WITH_SUBTITLE: - arteVideoDTO.addVideoOriginalWithSubtitle(resolution.get(), url); - break; - case ORIGINAL: - arteVideoDTO.addVideoOriginal(resolution.get(), url); - break; - } - } - }); - } - - return arteVideoDTO; - } - - private Optional mapQuality(String quality) { - switch (quality) { - case "EQ": - return Optional.of(Qualities.NORMAL); - case "HQ": - return Optional.of(Qualities.SMALL); - case "SQ": - return Optional.of(Qualities.HD); - case "MQ": - case "XQ": - return Optional.empty(); - default: - LOG.debug("unknown quality: {}", quality); - return Optional.empty(); - } - } -} diff --git a/src/main/java/mServer/crawler/sender/arte/ArteVideoDetailsDTO.java b/src/main/java/mServer/crawler/sender/arte/ArteVideoDetailsDTO.java deleted file mode 100644 index 9de3da725..000000000 --- a/src/main/java/mServer/crawler/sender/arte/ArteVideoDetailsDTO.java +++ /dev/null @@ -1,60 +0,0 @@ -package mServer.crawler.sender.arte; - -import mServer.crawler.sender.base.GeoLocations; - -import java.time.Duration; - -public class ArteVideoDetailsDTO { - private String broadcastBegin = ""; - private GeoLocations geo = GeoLocations.GEO_NONE; - - private String title = ""; - private String theme = ""; - private String description = ""; - private String website = ""; - private Duration duration = Duration.ZERO; - - public String getBroadcastBegin() { - return this.broadcastBegin; - } - - public void setBroadcastBegin(String aBroadcastBegin) { - this.broadcastBegin = aBroadcastBegin; - } - - public GeoLocations getGeoLocation() { - return this.geo; - } - - public void setGeoLocation(GeoLocations aGeo) { - this.geo = aGeo; - } - - public String getTitle() { - return title; - } - public void setTitle(String aTitle) { - title = aTitle; - } - public void setTheme(String aTheme) { - theme = aTheme; - } - public void setDescription(String aDescription) { - description = aDescription; - } - public void setWebsite(String aWebsite) { - website = aWebsite; - } - public String getTheme() { - return theme; - } - public String getDescription() { - return description; - } - public String getWebsite() { - return website; - } - - public void setDuration(Duration duration) { this.duration = duration; } - public Duration getDuration() { return duration; } -} diff --git a/src/main/java/mServer/crawler/sender/arte/ArteVideoDetailsDeserializer.java b/src/main/java/mServer/crawler/sender/arte/ArteVideoDetailsDeserializer.java deleted file mode 100644 index 96e732b9a..000000000 --- a/src/main/java/mServer/crawler/sender/arte/ArteVideoDetailsDeserializer.java +++ /dev/null @@ -1,331 +0,0 @@ -package mServer.crawler.sender.arte; - -import com.google.gson.JsonArray; -import com.google.gson.JsonDeserializationContext; -import com.google.gson.JsonDeserializer; -import com.google.gson.JsonElement; -import com.google.gson.JsonObject; -import com.google.gson.JsonParseException; -import java.lang.reflect.Type; -import java.text.ParseException; -import java.time.Duration; -import java.util.Calendar; -import mServer.crawler.sender.base.GeoLocations; -import mServer.tool.DateWithoutTimeComparer; -import org.apache.commons.lang3.time.FastDateFormat; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -public class ArteVideoDetailsDeserializer implements JsonDeserializer { - - private static final String JSON_ELEMENT_KEY_CATEGORY = "category"; - private static final String JSON_ELEMENT_KEY_SUBCATEGORY = "subcategory"; - private static final String JSON_ELEMENT_KEY_NAME = "name"; - private static final String JSON_ELEMENT_KEY_TITLE = "title"; - private static final String JSON_ELEMENT_KEY_SUBTITLE = "subtitle"; - private static final String JSON_ELEMENT_KEY_URL = "url"; - private static final String JSON_ELEMENT_KEY_PROGRAM_ID = "programId"; - private static final String JSON_ELEMENT_KEY_SHORT_DESCRIPTION = "shortDescription"; - - private static final String JSON_ELEMENT_BROADCAST_ELTERNKNOTEN_1 = "programs"; - private static final String JSON_ELEMENT_BROADCAST_ELTERNKNOTEN_2 = "broadcastProgrammings"; - private static final String JSON_ELEMENT_BROADCAST = "broadcastBeginRounded"; - private static final String JSON_ELEMENT_BROADCASTTYPE = "broadcastType"; - private static final String JSON_ELEMENT_BROADCAST_VIDEORIGHTS_BEGIN = "videoRightsBegin"; - private static final String JSON_ELEMENT_BROADCAST_CATCHUPRIGHTS_BEGIN = "catchupRightsBegin"; - private static final String JSON_ELEMENT_BROADCAST_CATCHUPRIGHTS_END = "catchupRightsEnd"; - private static final String BROADCASTTTYPE_FIRST = "FIRST_BROADCAST"; - private static final String BROADCASTTTYPE_MINOR_RE = "MINOR_REBROADCAST"; - private static final String BROADCASTTTYPE_MAJOR_RE = "MAJOR_REBROADCAST"; - private static final String ATTRIBUTE_DURATION = "durationSeconds"; - - private static final Logger LOG = LogManager.getLogger(ArteVideoDeserializer.class); - - private final FastDateFormat broadcastDateFormat = FastDateFormat.getInstance("yyyy-MM-dd'T'HH:mm:ssX");//2016-10-29T16:15:00Z - - private final Calendar today; - - public ArteVideoDetailsDeserializer(Calendar aToday) { - today = aToday; - } - - @Override - public ArteVideoDetailsDTO deserialize(JsonElement aJsonElement, Type aType, JsonDeserializationContext aContext) throws JsonParseException { - ArteVideoDetailsDTO detailsDTO = new ArteVideoDetailsDTO(); - - if (aJsonElement.isJsonObject() - && aJsonElement.getAsJsonObject().get(JSON_ELEMENT_BROADCAST_ELTERNKNOTEN_1).getAsJsonArray().size() > 0) { - - JsonObject programElement = aJsonElement.getAsJsonObject() - .get(JSON_ELEMENT_BROADCAST_ELTERNKNOTEN_1).getAsJsonArray().get(0).getAsJsonObject(); - - String titel = getTitle(programElement); - String thema = getSubject(programElement); - - String beschreibung = getElementValue(programElement, JSON_ELEMENT_KEY_SHORT_DESCRIPTION); - Duration duration = parseDuration(programElement); - - - String urlWeb = getElementValue(programElement, JSON_ELEMENT_KEY_URL); - detailsDTO.setDescription(beschreibung); - detailsDTO.setTheme(thema); - detailsDTO.setTitle(titel); - detailsDTO.setWebsite(urlWeb); - detailsDTO.setDuration(duration); - - JsonArray broadcastArray = programElement.get(JSON_ELEMENT_BROADCAST_ELTERNKNOTEN_2).getAsJsonArray(); - - if (broadcastArray.size() > 0) { - detailsDTO.setBroadcastBegin(getBroadcastDate(broadcastArray)); - } else { - // keine Ausstrahlungen verfügbar => catchupRightsBegin verwenden - // wenn es die auch nicht gibt => videoRightsBegin verwenden - String begin = getElementValue(programElement, JSON_ELEMENT_BROADCAST_CATCHUPRIGHTS_BEGIN); - if (begin.isEmpty()) { - begin = getElementValue(programElement, JSON_ELEMENT_BROADCAST_VIDEORIGHTS_BEGIN); - } - detailsDTO.setBroadcastBegin(begin); - } - - detailsDTO.setGeoLocation(getGeoLocation(programElement)); - } - - return detailsDTO; - } - - private Duration parseDuration(JsonObject programElement) { - long durationValue = 0; - - if (programElement.has(ATTRIBUTE_DURATION)) { - durationValue = programElement.get(ATTRIBUTE_DURATION).getAsLong(); - } - - return Duration.ofSeconds(durationValue); - } - - private static String getSubject(JsonObject programObject) { - String category = ""; - String subcategory = ""; - String subject; - - JsonElement catElement = programObject.get(JSON_ELEMENT_KEY_CATEGORY); - if (!catElement.isJsonNull()) { - JsonObject catObject = catElement.getAsJsonObject(); - category = catObject != null ? getElementValue(catObject, JSON_ELEMENT_KEY_NAME) : ""; - } - - JsonElement subcatElement = programObject.get(JSON_ELEMENT_KEY_SUBCATEGORY); - if (!subcatElement.isJsonNull()) { - JsonObject subcatObject = subcatElement.getAsJsonObject(); - subcategory = subcatObject != null ? getElementValue(subcatObject, JSON_ELEMENT_KEY_NAME) : ""; - } - - if (!category.equals(subcategory) && !subcategory.isEmpty()) { - subject = category + " - " + subcategory; - } else { - subject = category; - } - - return subject; - } - - private static String getTitle(JsonObject programObject) { - String title = getElementValue(programObject, JSON_ELEMENT_KEY_TITLE); - String subtitle = getElementValue(programObject, JSON_ELEMENT_KEY_SUBTITLE); - - if (!title.equals(subtitle) && !subtitle.isEmpty()) { - title = title + " - " + subtitle; - } - - return title; - } - - private static boolean isValidProgramObject(JsonObject programObject) { - return programObject.has(JSON_ELEMENT_KEY_TITLE) - && programObject.has(JSON_ELEMENT_KEY_PROGRAM_ID) - && programObject.has(JSON_ELEMENT_KEY_URL) - && !programObject.get(JSON_ELEMENT_KEY_TITLE).isJsonNull() - && !programObject.get(JSON_ELEMENT_KEY_PROGRAM_ID).isJsonNull() - && !programObject.get(JSON_ELEMENT_KEY_URL).isJsonNull(); - } - - private static String getElementValue(JsonObject jsonObject, String elementName) { - return !jsonObject.get(elementName).isJsonNull() ? jsonObject.get(elementName).getAsString() : ""; - } - - private GeoLocations getGeoLocation(JsonObject programElement) { - GeoLocations geo = GeoLocations.GEO_NONE; - - if (programElement.has("geoblocking")) { - JsonElement geoElement = programElement.get("geoblocking"); - if (!geoElement.isJsonNull()) { - JsonObject geoObject = geoElement.getAsJsonObject(); - if (!geoObject.isJsonNull() && geoObject.has("code")) { - String code = geoObject.get("code").getAsString(); - switch (code) { - case "DE_FR": - geo = GeoLocations.GEO_DE_FR; - break; - case "EUR_DE_FR": - geo = GeoLocations.GEO_DE_AT_CH_FR; - break; - case "SAT": - geo = GeoLocations.GEO_DE_AT_CH_EU; - break; - case "ALL": - geo = GeoLocations.GEO_NONE; - break; - default: - LOG.debug("New ARTE GeoLocation: " + code); - } - } - } - } - - return geo; - } - - /** - * ermittelt Ausstrahlungsdatum aus der Liste der Ausstrahlungen - * - * @param broadcastArray - * @return - */ - private String getBroadcastDate(JsonArray broadcastArray) { - String broadcastDate = ""; - String broadcastBeginFirst = ""; - String broadcastBeginMajor = ""; - String broadcastBeginMinor = ""; - - // nach Priorität der BroadcastTypen den relevanten Eintrag suchen - // FIRST_BROADCAST => MAJOR_REBROADCAST => MINOR_REBROADCAST - // dabei die "aktuellste" Ausstrahlung verwenden - for (int i = 0; i < broadcastArray.size(); i++) { - JsonObject broadcastObject = broadcastArray.get(i).getAsJsonObject(); - - if (broadcastObject.has(JSON_ELEMENT_BROADCASTTYPE) - && broadcastObject.has(JSON_ELEMENT_BROADCAST)) { - String value = this.getBroadcastDateConsideringCatchupRights(broadcastObject); - - if (!value.isEmpty()) { - String type = broadcastObject.get(JSON_ELEMENT_BROADCASTTYPE).getAsString(); - switch (type) { - case BROADCASTTTYPE_FIRST: - broadcastBeginFirst = value; - break; - case BROADCASTTTYPE_MAJOR_RE: - broadcastBeginMajor = value; - break; - case BROADCASTTTYPE_MINOR_RE: - broadcastBeginMinor = value; - break; - default: - LOG.debug("New broadcasttype: " + type); - } - } - } - } - - if (!broadcastBeginFirst.isEmpty()) { - broadcastDate = broadcastBeginFirst; - } else if (!broadcastBeginMajor.isEmpty()) { - broadcastDate = broadcastBeginMajor; - } else if (!broadcastBeginMinor.isEmpty()) { - broadcastDate = broadcastBeginMinor; - } - - // wenn kein Ausstrahlungsdatum vorhanden, dann die erste Ausstrahlung nehmen - // egal, wann die CatchupRights liegen, damit ein "sinnvolles" Datum vorhanden ist - if (broadcastDate.isEmpty()) { - broadcastDate = getBroadcastDateIgnoringCatchupRights(broadcastArray, BROADCASTTTYPE_FIRST); - } - // wenn immer noch leer, dann die Major-Ausstrahlung verwenden - if (broadcastDate.isEmpty()) { - broadcastDate = getBroadcastDateIgnoringCatchupRights(broadcastArray, BROADCASTTTYPE_MAJOR_RE); - } - - return broadcastDate; - } - - /** - * Liefert den Beginn der Ausstrahlung, wenn - heute im Zeitraum von - * CatchUpRights liegt - oder heute vor dem Zeitraum liegt - oder - * CatchUpRights nicht gesetzt ist - * - * @param broadcastObject - * @return der Beginn der Ausstrahlung oder "" - */ - private String getBroadcastDateConsideringCatchupRights(JsonObject broadcastObject) { - String broadcastDate = ""; - - JsonElement elementBegin = broadcastObject.get(JSON_ELEMENT_BROADCAST_CATCHUPRIGHTS_BEGIN); - JsonElement elementEnd = broadcastObject.get(JSON_ELEMENT_BROADCAST_CATCHUPRIGHTS_END); - - if (!elementBegin.isJsonNull() && !elementEnd.isJsonNull()) { - String begin = elementBegin.getAsString(); - String end = elementEnd.getAsString(); - - try { - Calendar beginDate = Calendar.getInstance(); - beginDate.setTime(broadcastDateFormat.parse(begin)); - Calendar endDate = Calendar.getInstance(); - endDate.setTime(broadcastDateFormat.parse(end)); - - if ((DateWithoutTimeComparer.compare(today, beginDate) >= 0 && DateWithoutTimeComparer.compare(today, endDate) <= 0) - || (DateWithoutTimeComparer.compare(today, beginDate) < 0)) { - // wenn das heutige Datum zwischen begin und end liegt, - // dann ist es die aktuelle Ausstrahlung - JsonElement elementActual = broadcastObject.get(JSON_ELEMENT_BROADCAST); - if (elementActual != null && !elementActual.isJsonNull()) { - broadcastDate = elementActual.getAsString(); - } - } - - } catch (ParseException ex) { - LOG.debug(ex); - } - } else { - JsonElement broadcastElement = broadcastObject.get(JSON_ELEMENT_BROADCAST); - if (broadcastElement != null && !broadcastElement.isJsonNull()) { - - String broadcast = broadcastElement.getAsString(); - - try { - Calendar broadcastCal = Calendar.getInstance(); - broadcastCal.setTime(broadcastDateFormat.parse(broadcast)); - broadcastDate = broadcast; - - } catch (ParseException ex) { - LOG.debug(ex); - } - } - } - return broadcastDate; - } - - /** - * * - * liefert die erste Ausstrahlung des Typs ohne Berücksichtigung der - * CatchupRights - */ - private static String getBroadcastDateIgnoringCatchupRights(JsonArray broadcastArray, String broadcastType) { - String broadcastDate = ""; - - for (int i = 0; i < broadcastArray.size(); i++) { - JsonObject broadcastObject = broadcastArray.get(i).getAsJsonObject(); - - if (broadcastObject.has(JSON_ELEMENT_BROADCASTTYPE) - && broadcastObject.has(JSON_ELEMENT_BROADCAST)) { - String type = broadcastObject.get(JSON_ELEMENT_BROADCASTTYPE).getAsString(); - - if (type.equals(broadcastType)) { - if (!broadcastObject.get(JSON_ELEMENT_BROADCAST).isJsonNull()) { - broadcastDate = (broadcastObject.get(JSON_ELEMENT_BROADCAST).getAsString()); - } - } - } - } - - return broadcastDate; - } -} diff --git a/src/main/java/mServer/crawler/sender/arte/MediathekArte.java b/src/main/java/mServer/crawler/sender/arte/MediathekArte.java deleted file mode 100644 index 5876bdde3..000000000 --- a/src/main/java/mServer/crawler/sender/arte/MediathekArte.java +++ /dev/null @@ -1,330 +0,0 @@ -package mServer.crawler.sender.arte; - -import com.google.gson.Gson; -import com.google.gson.GsonBuilder; -import de.mediathekview.mlib.Config; -import de.mediathekview.mlib.Const; -import de.mediathekview.mlib.daten.DatenFilm; -import de.mediathekview.mlib.daten.ListeFilme; -import de.mediathekview.mlib.tool.Log; -import mServer.crawler.CrawlerTool; -import mServer.crawler.FilmeSuchen; -import mServer.crawler.sender.MediathekReader; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -import java.time.LocalDate; -import java.time.format.DateTimeFormatter; -import java.util.*; -import java.util.stream.Collectors; - -public class MediathekArte extends MediathekReader { - - /* - * Informationen zu den ARTE-URLs: - * {} sind nur Makierungen, dass es Platzhalter sind, sie gehören nicht zur URL. - * - * Allgemeine URL eines Films: (050169-002-A = ID des Films); (die-spur-der-steine = Titel) - * http://www.arte.tv/de/videos/{050169-002-A}/{die-spur-der-steine} - * - * Alle Sendungen: (Deutsch = DE; Französisch = FR) - * https://api.arte.tv/api/opa/v3/videos?channel={DE} - * - * Informationen zum Film: (050169-002-A = ID des Films); (de für deutsch / fr für französisch) - * https://api.arte.tv/api/player/v1/config/{de}/{050169-002-A}?platform=ARTE_NEXT - * - * Zweite Quelle für Informationen zum Film: (050169-002-A = ID des Films); (de für deutsch / fr für französisch) - * https://api.arte.tv/api/opa/v3/programs/{de}/{050169-002-A} - * - * Hintergrundinfos zum Laden der Filme nach Kategorien im langen Lauf: - * 1. statische Informationen über verfügbare Kategorien laden: URL_STATIC_CONTENT - * 2. für jede Kategorie die Unterkategorien ermitteln: URL_CATEGORY - * 3. für jede Unterkategorie die enthaltenen ProgramId ermitteln: URL_SUBCATEGORY - * 4. für alle ProgramIds die Videoinformationen laden (wie kurze Variante) - */ - private static final Logger LOG = LogManager.getLogger(MediathekArte.class); - private static final String ARTE_API_TAG_URL_PATTERN = "https://api.arte.tv/api/opa/v3/videos?channel=%s&arteSchedulingDay=%s"; - - private static final String URL_CATEGORY = "https://www.arte.tv/api/rproxy/emac/v4/%s/web/pages/%s"; - - private static final String[] CATEGORIES = { - "ARS", - "DOR", - "CIN", - "SER", - "ACT", - "CPO", - "SCI", - "DEC", - "HIS" - }; - - private static final String COLLECTION_URL = "https://api.arte.tv/api/opa/v3/programs/%s/%s"; - - private static final DateTimeFormatter ARTE_API_DATEFORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd"); - - public static final String ARTE_EN = "ARTE.EN"; - public static final String ARTE_ES = "ARTE.ES"; - public static final String ARTE_IT = "ARTE.IT"; - public static final String ARTE_PL = "ARTE.PL"; - - private final Map senderLanguages = new HashMap<>(); - - public MediathekArte(FilmeSuchen ssearch, int startPrio) { - super(ssearch, Const.ARTE_DE,/* threads */ 2, /* urlWarten */ 200, startPrio); - } - - - public MediathekArte(FilmeSuchen ssearch, int startPrio, String name) { - super(ssearch, name,/* threads */ 2, /* urlWarten */ 200, startPrio); - } - - @Override - protected synchronized void meldungStart() { - super.meldungStart(); - - senderLanguages.put(Const.ARTE_DE, "de"); - senderLanguages.put(Const.ARTE_FR, "fr"); - if (LocalDate.now().getDayOfYear() % 2 == 0) { - senderLanguages.put(ARTE_EN, "en"); - senderLanguages.put(ARTE_ES, "es"); - } else { - senderLanguages.put(ARTE_IT, "it"); - senderLanguages.put(ARTE_PL, "pl"); - } - - // starte Sprachen Sender, da es sonst zu doppelten Sendern kommen kann - senderLanguages.keySet().forEach(sender -> mlibFilmeSuchen.melden(sender, getMax(), getProgress(), "")); - } - - @Override - protected synchronized void meldungThreadUndFertig() { - // der MediathekReader ist erst fertig wenn nur noch ein Thread läuft - // dann zusätzliche Sender, die der Crawler bearbeitet, beenden - if (getThreads() <= 1) { - senderLanguages.keySet().stream() - // DE nicht beenden, das erfolgt durch den Aufruf der Basisklasse - .filter(sender -> !sender.equals(Const.ARTE_DE)) - .forEach(sender -> mlibFilmeSuchen.meldenFertig(sender)); - } - - super.meldungThreadUndFertig(); - } - - //=================================== - // public - //=================================== - @Override - public void addToList() { - meldungStart(); - if (Config.getStop()) { - meldungThreadUndFertig(); - } else { - if (CrawlerTool.loadLongMax()) { - addRecentList(); - //addCategories(); - meldungAddMax(listeThemen.size()); - - for (int t = 0; t < getMaxThreadLaufen(); ++t) { - Thread th = new CategoryLoader(); - th.setName(getSendername() + t); - th.start(); - } - - } else { - addTage(); - meldungAddMax(listeThemen.size()); - for (int t = 0; t < getMaxThreadLaufen(); ++t) { - Thread th = new ThemaLaden(); - th.setName(getSendername() + t); - th.start(); - } - } - } - } - - private void addCategories() { - senderLanguages.forEach((sender, langCode) -> { - for (String category : CATEGORIES) { - String categoryUrl = String.format(URL_CATEGORY, langCode.toLowerCase(), category); - listeThemen.add(new String[]{sender, langCode, category, categoryUrl}); - } - }); - } - - private void addRecentList() { - senderLanguages.forEach((sender, langCode) -> { - String u = String.format("https://www.arte.tv/api/rproxy/emac/v4/%s/web/pages/MOST_RECENT/", langCode); - listeThemen.add(new String[]{sender, langCode, "recent", u}); - }); - } - - private void addTage() { - senderLanguages.forEach((sender, langCode) -> { - // http://www.arte.tv/guide/de/plus7/videos?day=-2&page=1&isLoading=true&sort=newest&country=DE - for (int i = 0; i <= 14; ++i) { - String u = String.format(ARTE_API_TAG_URL_PATTERN, langCode.toUpperCase(), LocalDate.now().minusDays(i).format(ARTE_API_DATEFORMATTER)); - listeThemen.add(new String[]{sender, u}); - } - for (int i = 1; i <= 21; ++i) { - String u = String.format(ARTE_API_TAG_URL_PATTERN, langCode.toUpperCase(), LocalDate.now().plusDays(i).format(ARTE_API_DATEFORMATTER)); - listeThemen.add(new String[]{sender, u}); - } - }); - } - - class ThemaLaden extends Thread { - - private final Map senderGsonMap; - - public ThemaLaden() { - senderGsonMap = new HashMap<>(); - senderLanguages.forEach((sender, language) -> senderGsonMap.put(sender, new GsonBuilder().registerTypeAdapter(ListeFilme.class, new ArteDatenFilmDeserializer(language, sender)).create())); - } - - @Override - public void run() { - try { - meldungAddThread(); - String[] link; - while (!Config.getStop() && (link = listeThemen.getListeThemen()) != null) { - meldungProgress(link[1]); - addFilmeForTag(link[0], link[1]); - } - } catch (Exception ex) { - Log.errorLog(894330854, ex, ""); - } - meldungThreadUndFertig(); - } - - private void addFilmeForTag(String sender, String aUrl) { - - ListeFilme loadedFilme = ArteHttpClient.executeRequest(sender, LOG, senderGsonMap.get(sender), aUrl, ListeFilme.class); - if (loadedFilme != null) { - loadedFilme.forEach(film -> addFilm(film)); - } - } - } - - /** - * Lädt die Filme für jede Kategorie - */ - class CategoryLoader extends Thread { - - private int subPage = 0; - - @Override - public void run() { - try { - meldungAddThread(); - String[] link; - while (!Config.getStop() && (link = listeThemen.getListeThemen()) != null) { - meldungProgress(link[2] + "/" + link[3] /* url */); - loadCategory(link[0], link[1], link[2], link[3]); - } - } catch (Exception ex) { - Log.errorLog(894330854, ex, ""); - } - meldungThreadUndFertig(); - } - - private void loadCategory(String sender, String langCode, String aCategory, String aUrl) { - Gson gson = new GsonBuilder() - .registerTypeAdapter(ArteCategoryFilmsDTO.class, new ArteCategoryFilmListDeserializer()) - .create(); - Gson gsonCollectionParent = new GsonBuilder() - .registerTypeAdapter(ArteCategoryFilmsDTO.class, new ArteCollectionParentDeserializer()) - .create(); - Gson gsonCollectionChild = new GsonBuilder() - .registerTypeAdapter(ArteCategoryFilmsDTO.class, new ArteCollectionChildDeserializer()) - .create(); - Gson gsonNextPage =new GsonBuilder() - .registerTypeAdapter(ArteCategoryFilmsDTO.class, new ArteSubPageDeserializer()).create(); - - ArteCategoryFilmsDTO dto = loadSubCategoryPage(gson, sender, aUrl); - if (dto != null) { - loadCollections(sender, langCode, gsonCollectionParent, gsonCollectionChild, dto); - Log.sysLog(String.format("%s: category %s: %d programs, %d collections", sender, aCategory, dto.getProgramIds().size(), dto.getCollectionIds().size())); - // alle programIds verarbeiten - ListeFilme loadedFilme = loadPrograms(sender, langCode, dto); - loadedFilme.forEach(film -> addFilm(film)); - Log.sysLog(String.format("%s: category %s: %d Filme", sender, aCategory, loadedFilme.size())); - if (dto.hasNextPage()) { - loadNextPage(sender, langCode, aCategory, dto.getNextPageUrl(), gsonCollectionParent, gsonCollectionChild, gsonNextPage); - } - } - } - - private void loadNextPage(String sender, String langCode, String aCategory, String url, Gson gsonCollectionParent, Gson gsonCollectionChild, Gson gsonNextPage) { - subPage++; - ArteCategoryFilmsDTO dto = loadSubCategoryPage(gsonNextPage, sender, url); - if (dto != null) { - loadCollections(sender, langCode, gsonCollectionParent, gsonCollectionChild, dto); - Log.sysLog(String.format("%s: category %s: %d programs, %d collections", sender, aCategory, dto.getProgramIds().size(), dto.getCollectionIds().size())); - // alle programIds verarbeiten - ListeFilme loadedFilme = loadPrograms(sender, langCode, dto); - loadedFilme.forEach(film -> addFilm(film)); - Log.sysLog(String.format("%s: category %s - page %d: %d Filme", sender, aCategory, subPage, loadedFilme.size())); - if (dto.hasNextPage() && shouldLoadNextPage(sender)) { - loadNextPage(sender, langCode, aCategory, dto.getNextPageUrl(), gsonCollectionParent, gsonCollectionChild, gsonNextPage); - } - } - } - - private boolean shouldLoadNextPage(String sender) { - if (Const.ARTE_DE.equals(sender)) { - return subPage < 10; - } - - return subPage < 2; - } - - private void loadCollections(String sender, String langCode, Gson gsonParent, Gson gsonChild, ArteCategoryFilmsDTO dto) { - dto.getCollectionIds().forEach(collectionId -> { - final String url = String.format(COLLECTION_URL, langCode, collectionId); - try { - final ArteCategoryFilmsDTO parentDto = ArteHttpClient.executeRequest(sender, LOG, gsonParent, url, ArteCategoryFilmsDTO.class); - if (parentDto != null) { - parentDto.getCollectionIds().forEach(childCollectionId -> { - final String urlChild = String.format(COLLECTION_URL, langCode, childCollectionId); - final ArteCategoryFilmsDTO collectionDto = ArteHttpClient.executeRequest(sender, LOG, gsonChild, urlChild, ArteCategoryFilmsDTO.class); - if (collectionDto != null) { - collectionDto.getProgramIds().forEach(dto::addProgramId); - } - }); - } - } catch (Exception e) { - Log.errorLog(894330855, e, url); - } - }); - } - - private ListeFilme loadPrograms(String sender, String langCode, ArteCategoryFilmsDTO dto) { - ListeFilme listeFilme = new ListeFilme(); - - Collection futureFilme = new ArrayList<>(); - dto.getProgramIds().forEach(programId -> { - try { - Set films = new ArteProgramIdToDatenFilmCallable(programId, langCode, sender).call(); - for (DatenFilm film : films) { - futureFilme.add(film); - } - } catch (Exception exception) { - LOG.error("Es ist ein Fehler beim lesen der Arte Filme aufgetreten.", exception); - } - }); - - final List list = futureFilme.parallelStream() - .filter(Objects::nonNull) - .collect(Collectors.toList()); - listeFilme.addAll(list); - list.clear(); - - return listeFilme; - } - - private ArteCategoryFilmsDTO loadSubCategoryPage(Gson gson, String sender, String aUrl) { - return ArteHttpClient.executeRequest(sender, LOG, gson, aUrl, ArteCategoryFilmsDTO.class); - } - } -} diff --git a/src/main/java/mServer/crawler/sender/arte/json/ArteSubtitleLinkDto.java b/src/main/java/mServer/crawler/sender/arte/json/ArteSubtitleLinkDto.java new file mode 100644 index 000000000..1bf6c6ce1 --- /dev/null +++ b/src/main/java/mServer/crawler/sender/arte/json/ArteSubtitleLinkDto.java @@ -0,0 +1,55 @@ +package mServer.crawler.sender.arte.json; + +import java.util.Optional; + +public class ArteSubtitleLinkDto { + private Optional arteCode; + private Optional version; + private Optional iso6392Code; + private Optional iso6391Code; + private Optional label; + private Optional closedCaptioning; + private Optional burned; + private Optional filename; + public ArteSubtitleLinkDto(Optional arteCode, Optional version, Optional iso6392Code, + Optional iso6391Code, Optional label, Optional closedCaptioning, Optional burned, + Optional filename) { + super(); + this.arteCode = arteCode; + this.version = version; + this.iso6392Code = iso6392Code; + this.iso6391Code = iso6391Code; + this.label = label; + this.closedCaptioning = closedCaptioning; + this.burned = burned; + this.filename = filename; + } + public Optional getArteCode() { + return arteCode; + } + public Optional getVersion() { + return version; + } + public Optional getIso6392Code() { + return iso6392Code; + } + public Optional getIso6391Code() { + return iso6391Code; + } + public Optional getLabel() { + return label; + } + public Optional getClosedCaptioning() { + return closedCaptioning; + } + public Optional getBurned() { + return burned; + } + public Optional getFilename() { + return filename; + } + + + + +} diff --git a/src/main/java/mServer/crawler/sender/arte/json/ArteVideoInfoDeserializer.java b/src/main/java/mServer/crawler/sender/arte/json/ArteVideoInfoDeserializer.java new file mode 100644 index 000000000..ed46cb163 --- /dev/null +++ b/src/main/java/mServer/crawler/sender/arte/json/ArteVideoInfoDeserializer.java @@ -0,0 +1,153 @@ +package mServer.crawler.sender.arte.json; + +import java.lang.reflect.Type; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Optional; + +import com.google.gson.JsonArray; +import com.google.gson.JsonDeserializationContext; +import com.google.gson.JsonDeserializer; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.JsonParseException; +import mServer.crawler.sender.base.JsonUtils; +import mServer.crawler.sender.base.PagedElementListDTO; + +public class ArteVideoInfoDeserializer implements JsonDeserializer> { + private static final String EXTRAIT = "EXTRAIT"; + private static final HashSet INCLUDE_KIND = new HashSet<>(); + static { + INCLUDE_KIND.add("SHOW"); + INCLUDE_KIND.add("MANUAL_CLIP"); + INCLUDE_KIND.add("BONUS"); + } + + private static final String[] TAG_NEXT_PAGE_NEXT = {"meta","videos","links","next","href"}; + private static final String[] TAG_NEXT_PAGE_TOTAL = {"meta","videos","totalCount"}; + private static final String[] TAG_NEXT_PAGE_PAGES = {"meta","videos","pages"}; + private static final String[] TAG_NEXT_PAGE_PAGE = {"meta","videos","page"}; + + private static final String TAG_SUBTITLES = "subtitles"; + private static final String TAG_SUBTITLES_ARTECODE = "arteCode"; + private static final String TAG_SUBTITLES_VERSION = "version"; + private static final String TAG_SUBTITLES_ISO6392CODE = "iso6392Code"; + private static final String TAG_SUBTITLES_ISO6391CODE = "iso6391Code"; + private static final String TAG_SUBTITLES_LABEL = "label"; + private static final String TAG_SUBTITLES_CLOSEDCAPTIONING = "closedCaptioning"; + private static final String TAG_SUBTITLES_BURNED = "burned"; + private static final String TAG_SUBTITLES_FILENAME = "filename"; + + private static final String TAG_VIDEO_INFO = "videos"; + + private static final String TAG_FIRST_BROADCAST_DATE = "firstBroadcastDate"; + private static final String TAG_ID = "id"; + private static final String TAG_PROGRAM_ID = "programId"; + private static final String TAG_CHANNEL = "channel"; + private static final String TAG_LANGUAGE = "language"; + private static final String TAG_KIND = "kind"; + private static final String TAG_CATALOG_TYPE = "catalogType"; + private static final String TAG_PROGRAM_TYPE = "programType"; + private static final String TAG_PLATFORM = "platform"; + private static final String TAG_PLATFORM_LABEL = "platformLabel"; + private static final String TAG_TITLE = "title"; + private static final String TAG_SUBTITLE = "subtitle"; + private static final String TAG_ORIGINAL_TITLE = "originalTitle"; + private static final String TAG_DURATION_SECONDS = "durationSeconds"; + private static final String TAG_SHORT_DESCRIPTION = "shortDescription"; + private static final String TAG_FULL_DESCRIPTION = "fullDescription"; + private static final String TAG_HEADER_TEXT = "headerText"; + private static final String TAG_GEOBLOCKING_ZONE = "geoblockingZone"; + private static final String TAG_URL = "url"; + private static final String TAG_SEASON = "season"; + private static final String TAG_EPISODE = "episode"; + private static final String TAG_BROADCAST_BEGIN = "broadcastBegin"; + private static final String TAG_CREATIONDATE = "creationDate"; + private static final String TAG_BROADCAST_BEGIN_ROUNDED = "broadcastBeginRounded"; + private static final String[] TAG_CATEGORY_CODE = {"category","code"}; + private static final String[] TAG_CATEGORY_NAME = {"category","name"}; + private static final String[] TAG_SUBCATEGORY_NAME = {"subcategory","name"}; + + @Override + public PagedElementListDTO deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) + throws JsonParseException { + final PagedElementListDTO videoUrls = new PagedElementListDTO<>(); + // + Optional nextPage = JsonUtils.getElementValueAsString(json, TAG_NEXT_PAGE_NEXT); + Optional nextPagePages = JsonUtils.getElementValueAsInteger(json, TAG_NEXT_PAGE_PAGES); + Optional nextPagePageIndex = JsonUtils.getElementValueAsInteger(json, TAG_NEXT_PAGE_PAGE); + if (nextPage.isPresent() && nextPagePages.isPresent() && nextPagePageIndex.isPresent() && nextPagePageIndex.get() < nextPagePages.get()) { + videoUrls.setNextPage(nextPage); + } + // + final JsonObject searchElement = json.getAsJsonObject(); + final JsonArray itemArray = searchElement.getAsJsonArray(TAG_VIDEO_INFO); + for (JsonElement arrayElement : itemArray) { + parseVideoInfoElement(arrayElement).ifPresent(videoUrls::addElement); + } + return videoUrls; + } + + protected Optional parseVideoInfoElement(final JsonElement arrayElement) { + // EXTRAIT + if (JsonUtils.getElementValueAsString(arrayElement, TAG_PLATFORM).orElse("").equalsIgnoreCase(EXTRAIT) || + !INCLUDE_KIND.contains(JsonUtils.getElementValueAsString(arrayElement, TAG_KIND).orElse("").toUpperCase())) { + return Optional.empty(); + } + // + List arteRestSubtitleLinkDto = new ArrayList<>(); + if (arrayElement.getAsJsonObject().has(TAG_SUBTITLES) && + arrayElement.getAsJsonObject().get(TAG_SUBTITLES).isJsonArray()) { + final JsonArray subtitles = arrayElement.getAsJsonObject().get(TAG_SUBTITLES).getAsJsonArray(); + for (JsonElement subs : subtitles) { + arteRestSubtitleLinkDto.add( + new ArteSubtitleLinkDto( + JsonUtils.getElementValueAsString(subs, TAG_SUBTITLES_ARTECODE), + JsonUtils.getElementValueAsString(subs, TAG_SUBTITLES_VERSION), + JsonUtils.getElementValueAsString(subs, TAG_SUBTITLES_ISO6392CODE), + JsonUtils.getElementValueAsString(subs, TAG_SUBTITLES_ISO6391CODE), + JsonUtils.getElementValueAsString(subs, TAG_SUBTITLES_LABEL), + JsonUtils.getElementValueAsString(subs, TAG_SUBTITLES_CLOSEDCAPTIONING), + JsonUtils.getElementValueAsString(subs, TAG_SUBTITLES_BURNED), + JsonUtils.getElementValueAsString(subs, TAG_SUBTITLES_FILENAME))); + } + } + // + ArteVideoInfoDto arteRestVideoInfoDto = new ArteVideoInfoDto( + JsonUtils.getElementValueAsString(arrayElement, TAG_FIRST_BROADCAST_DATE), + JsonUtils.getElementValueAsString(arrayElement, TAG_ID), + JsonUtils.getElementValueAsString(arrayElement, TAG_PROGRAM_ID), + JsonUtils.getElementValueAsString(arrayElement, TAG_CHANNEL), + JsonUtils.getElementValueAsString(arrayElement, TAG_LANGUAGE), + JsonUtils.getElementValueAsString(arrayElement, TAG_KIND), + JsonUtils.getElementValueAsString(arrayElement, TAG_CATALOG_TYPE), + JsonUtils.getElementValueAsString(arrayElement, TAG_PROGRAM_TYPE), + JsonUtils.getElementValueAsString(arrayElement, TAG_PLATFORM), + JsonUtils.getElementValueAsString(arrayElement, TAG_PLATFORM_LABEL), + JsonUtils.getElementValueAsString(arrayElement, TAG_TITLE), + JsonUtils.getElementValueAsString(arrayElement, TAG_SUBTITLE), + JsonUtils.getElementValueAsString(arrayElement, TAG_ORIGINAL_TITLE), + JsonUtils.getElementValueAsString(arrayElement, TAG_DURATION_SECONDS), + JsonUtils.getElementValueAsString(arrayElement, TAG_SHORT_DESCRIPTION), + JsonUtils.getElementValueAsString(arrayElement, TAG_FULL_DESCRIPTION), + JsonUtils.getElementValueAsString(arrayElement, TAG_HEADER_TEXT), + JsonUtils.getElementValueAsString(arrayElement, TAG_GEOBLOCKING_ZONE), + JsonUtils.getElementValueAsString(arrayElement, TAG_URL), + JsonUtils.getElementValueAsString(arrayElement, TAG_SEASON), + JsonUtils.getElementValueAsString(arrayElement, TAG_EPISODE), + JsonUtils.getElementValueAsString(arrayElement, TAG_BROADCAST_BEGIN), + JsonUtils.getElementValueAsString(arrayElement, TAG_BROADCAST_BEGIN_ROUNDED), + JsonUtils.getElementValueAsString(arrayElement, TAG_CATEGORY_CODE), + JsonUtils.getElementValueAsString(arrayElement, TAG_CATEGORY_NAME), + JsonUtils.getElementValueAsString(arrayElement, TAG_SUBCATEGORY_NAME), + JsonUtils.getElementValueAsString(arrayElement, TAG_CREATIONDATE), + JsonUtils.getElementValueAsString(arrayElement, TAG_NEXT_PAGE_PAGE) + ); + + arteRestVideoInfoDto.setSubtitleLinks(arteRestSubtitleLinkDto); + + return Optional.of(arteRestVideoInfoDto); + } + +} diff --git a/src/main/java/mServer/crawler/sender/arte/json/ArteVideoInfoDto.java b/src/main/java/mServer/crawler/sender/arte/json/ArteVideoInfoDto.java new file mode 100644 index 000000000..33f463369 --- /dev/null +++ b/src/main/java/mServer/crawler/sender/arte/json/ArteVideoInfoDto.java @@ -0,0 +1,198 @@ +package mServer.crawler.sender.arte.json; + +import java.util.List; +import java.util.Objects; +import java.util.Optional; + +import mServer.crawler.sender.arte.ArteConstants; +import mServer.crawler.sender.base.CrawlerUrlDTO; + +public class ArteVideoInfoDto extends CrawlerUrlDTO { + private Optional firstBroadcastDate; + private Optional id; + private Optional programId; + private Optional channel; + private Optional language; + private Optional kind; + private Optional catalogType; + private Optional programType; + private Optional platform; + private Optional platformLabel; + private Optional title; + private Optional subtitle; + private Optional originalTitle; + private Optional durationSeconds; + private Optional shortDescription; + private Optional fullDescription; + private Optional headerText; + private Optional geoblockingZone; + private Optional website; + private Optional season; + private Optional episode; + private Optional broadcastBegin; + private Optional broadcastBeginRounded; + private Optional category; + private Optional categoryName; + private Optional subcategoryName; + private Optional creationDate; + private Optional pageIndex; + private List videoLinks; + private List subtitleLinks; + + // ONLY for unit tests + public ArteVideoInfoDto(Optional id, Optional programId,Optional kind, Optional language) { + super(String.format(ArteConstants.VIDEO_URL, programId.get(), kind.get(), language.get())); + this.id = id; + } + + public ArteVideoInfoDto(Optional firstBroadcastDate, Optional id, Optional programId, Optional channel, Optional language, + Optional kind, Optional catalogType, Optional programType, Optional platform, Optional platformLabel, Optional title, + Optional subtitle, Optional originalTitle, Optional durationSeconds, Optional shortDescription, Optional fullDescription, + Optional headerText, Optional geoblockingZone, Optional url, Optional season, Optional episode, Optional broadcastBegin, + Optional broadcastBeginRounded,Optional category, Optional categoryName, Optional subcategoryName, Optional creationDate, Optional pageIndex) { + super(String.format(ArteConstants.VIDEO_URL, programId.orElseThrow(), kind.orElseThrow(), language.orElseThrow())); + this.firstBroadcastDate = firstBroadcastDate; + this.id = id; + this.programId = programId; + this.channel = channel; + this.language = language; + this.kind = kind; + this.catalogType = catalogType; + this.programType = programType; + this.platform = platform; + this.platformLabel = platformLabel; + this.title = title; + this.subtitle = subtitle; + this.originalTitle = originalTitle; + this.durationSeconds = durationSeconds; + this.shortDescription = shortDescription; + this.fullDescription = fullDescription; + this.headerText = headerText; + this.geoblockingZone = geoblockingZone; + this.website = url; + this.season = season; + this.episode = episode; + this.broadcastBegin = broadcastBegin; + this.broadcastBeginRounded = broadcastBeginRounded; + this.category = category; + this.categoryName = categoryName; + this.subcategoryName = subcategoryName; + this.creationDate = creationDate; + this.pageIndex = pageIndex; + } + + public Optional getFirstBroadcastDate() { + return firstBroadcastDate; + } + public Optional getId() { + return id; + } + public Optional getProgramId() { + return programId; + } + public Optional getChannel() { + return channel; + } + public Optional getLanguage() { + return language; + } + public Optional getKind() { + return kind; + } + public Optional getCatalogType() { + return catalogType; + } + public Optional getProgramType() { + return programType; + } + public Optional getPlatform() { + return platform; + } + public Optional getPlatformLabel() { + return platformLabel; + } + public Optional getTitle() { + return title; + } + public Optional getSubtitle() { + return subtitle; + } + public Optional getOriginalTitle() { + return originalTitle; + } + public Optional getDurationSeconds() { + return durationSeconds; + } + public Optional getShortDescription() { + return shortDescription; + } + public Optional getFullDescription() { + return fullDescription; + } + public Optional getHeaderText() { + return headerText; + } + public Optional getGeoblockingZone() { + return geoblockingZone; + } + public Optional getWebsite() { + return website; + } + public Optional getSeason() { + return season; + } + public Optional getEpisode() { + return episode; + } + public Optional getBroadcastBegin() { + return broadcastBegin; + } + public Optional getBroadcastBeginRounded() { + return broadcastBeginRounded; + } + public Optional getCategory() { + return category; + } + public Optional getCategoryName() { + return categoryName; + } + public Optional getSubcategoryName() { + return subcategoryName; + } + public Optional getCreationDate() { + return creationDate; + } + public Optional getPageIndex() { + return pageIndex; + } + + public List getVideoLinks() { + return videoLinks; + } + public void setVideoLinks(List input) { + videoLinks = input; + } + + public List getSubtitleLinks() { + return subtitleLinks; + } + public void setSubtitleLinks(List subtitleLinks) { + this.subtitleLinks = subtitleLinks; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) + return false; + ArteVideoInfoDto that = (ArteVideoInfoDto) o; + return Objects.equals(this.id.get(), that.id.get()); + } + + @Override + public int hashCode() { + return Objects.hash(id.get()); + } + + +} diff --git a/src/main/java/mServer/crawler/sender/arte/json/ArteVideoLinkDeserializer.java b/src/main/java/mServer/crawler/sender/arte/json/ArteVideoLinkDeserializer.java new file mode 100644 index 000000000..3cc247806 --- /dev/null +++ b/src/main/java/mServer/crawler/sender/arte/json/ArteVideoLinkDeserializer.java @@ -0,0 +1,62 @@ +package mServer.crawler.sender.arte.json; + +import java.lang.reflect.Type; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +import com.google.gson.JsonArray; +import com.google.gson.JsonDeserializationContext; +import com.google.gson.JsonDeserializer; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.JsonParseException; +import mServer.crawler.sender.base.JsonUtils; + +public class ArteVideoLinkDeserializer implements JsonDeserializer> { + + private static final String TAG_VIDEO_STREAMS = "videoStreams"; + private static final String TAG_PROGRAM_ID = "programId"; + private static final String TAG_URL = "url"; + private static final String TAG_QUALITY = "quality"; + private static final String TAG_AUDIO_SLOT = "audioSlot"; + private static final String TAG_AUDIO_CODE = "audioCode"; + private static final String TAG_AUDIO_LABEL = "audioLabel"; + private static final String TAG_AUDIO_SHORT_LABEL = "audioShortLabel"; + private static final String TAG_WIDTH = "width"; + private static final String TAG_HEIGHT = "height"; + + + @Override + public List deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) + throws JsonParseException { + final List videoUrls = new ArrayList<>(); + final JsonObject searchElement = json.getAsJsonObject(); + final JsonArray itemArray = searchElement.getAsJsonArray(TAG_VIDEO_STREAMS); + for (JsonElement arrayElement : itemArray) { + videoUrls.add(parseVideoElement(arrayElement)); + } + return videoUrls; + } + + protected ArteVideoLinkDto parseVideoElement(final JsonElement arrayElement) { + return new ArteVideoLinkDto( + JsonUtils.getElementValueAsString(arrayElement, TAG_PROGRAM_ID), + fixMissingHttpsProtocol(JsonUtils.getElementValueAsString(arrayElement, TAG_URL)), + JsonUtils.getElementValueAsString(arrayElement, TAG_QUALITY), + JsonUtils.getElementValueAsString(arrayElement, TAG_AUDIO_SLOT), + JsonUtils.getElementValueAsString(arrayElement, TAG_AUDIO_CODE), + JsonUtils.getElementValueAsString(arrayElement, TAG_AUDIO_LABEL), + JsonUtils.getElementValueAsString(arrayElement, TAG_AUDIO_SHORT_LABEL), + JsonUtils.getElementValueAsString(arrayElement, TAG_WIDTH), + JsonUtils.getElementValueAsString(arrayElement, TAG_HEIGHT) + ); + } + + protected Optional fixMissingHttpsProtocol(Optional inputUrl) { + if (inputUrl.isEmpty() || inputUrl.get().startsWith("https:")) { + return inputUrl; + } + return Optional.of(inputUrl.get().replace("http:", "https:")); + } +} diff --git a/src/main/java/mServer/crawler/sender/arte/json/ArteVideoLinkDto.java b/src/main/java/mServer/crawler/sender/arte/json/ArteVideoLinkDto.java new file mode 100644 index 000000000..a5522b51a --- /dev/null +++ b/src/main/java/mServer/crawler/sender/arte/json/ArteVideoLinkDto.java @@ -0,0 +1,60 @@ +package mServer.crawler.sender.arte.json; + +import java.util.Optional; + +public class ArteVideoLinkDto { + private Optional programId; + private Optional url; + private Optional quality; + private Optional audioSlot; + private Optional audioCode; + private Optional audioLabel; + private Optional audioShortLabel; + private Optional width; + private Optional height; + public ArteVideoLinkDto(Optional programId, Optional url, Optional quality, Optional audioSlot, Optional audioCode, + Optional audioLabel, Optional audioShortLabel, Optional width, Optional height) { + super(); + this.programId = programId; + this.url = url; + this.quality = quality; + this.audioSlot = audioSlot; + this.audioCode = audioCode; + this.audioLabel = audioLabel; + this.audioShortLabel = audioShortLabel; + this.width = width; + this.height = height; + } + public Optional getProgramId() { + return programId; + } + public Optional getUrl() { + return url; + } + public Optional getQuality() { + return quality; + } + public Optional getAudioSlot() { + return audioSlot; + } + public Optional getAudioCode() { + return audioCode; + } + public Optional getAudioLabel() { + return audioLabel; + } + public Optional getAudioShortLabel() { + return audioShortLabel; + } + public Optional getWidth() { + return width; + } + public Optional getHeight() { + return height; + } + + + + + +} diff --git a/src/main/java/mServer/crawler/sender/arte/tasks/ArteDtoVideo2FilmTask.java b/src/main/java/mServer/crawler/sender/arte/tasks/ArteDtoVideo2FilmTask.java new file mode 100644 index 000000000..22386e15b --- /dev/null +++ b/src/main/java/mServer/crawler/sender/arte/tasks/ArteDtoVideo2FilmTask.java @@ -0,0 +1,185 @@ +package mServer.crawler.sender.arte.tasks; + +import de.mediathekview.mlib.Config; +import de.mediathekview.mlib.daten.DatenFilm; +import mServer.crawler.CrawlerTool; +import mServer.crawler.sender.MediathekReader; +import mServer.crawler.sender.arte.ArteRestVideoTypeMapper; +import mServer.crawler.sender.arte.ArteVideoType; +import mServer.crawler.sender.arte.json.ArteVideoInfoDto; +import mServer.crawler.sender.base.AbstractRecursivConverterTask; +import mServer.crawler.sender.base.GeoLocations; +import mServer.crawler.sender.base.Qualities; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.time.Duration; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.time.ZonedDateTime; +import java.time.format.DateTimeFormatter; +import java.util.EnumMap; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.ConcurrentLinkedQueue; + +public class ArteDtoVideo2FilmTask extends AbstractRecursivConverterTask { + private static final long serialVersionUID = 1L; + private static final DateTimeFormatter DATE_FORMAT + = DateTimeFormatter.ofPattern("dd.MM.yyyy"); + private static final DateTimeFormatter TIME_FORMAT + = DateTimeFormatter.ofPattern("HH:mm:ss"); + private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ssX"); + private static final ZoneId ZONE_ID = ZoneId.of("Europe/Berlin"); + protected final transient Logger log = LogManager.getLogger(this.getClass()); + private final String sender; + + public ArteDtoVideo2FilmTask(MediathekReader aCrawler, ConcurrentLinkedQueue aUrlToCrawlDTOs, String sender) { + super(aCrawler, aUrlToCrawlDTOs); + this.sender = sender; + } + + @Override + protected AbstractRecursivConverterTask createNewOwnInstance( + ConcurrentLinkedQueue aElementsToProcess) { + return new ArteDtoVideo2FilmTask(crawler, aElementsToProcess, sender); + } + + @Override + protected Integer getMaxElementsToProcess() { + return 50; + } + + @Override + protected void processElement(ArteVideoInfoDto aElement) { + if (Config.getStop()) { + return; + } + + parse(aElement); + } + + protected void parse(ArteVideoInfoDto aElement) { + Map videoUrls = buildVideoUrls(aElement, ArteVideoType.DEFAULT); + Map videoSubs = buildVideoUrls(aElement, ArteVideoType.AUDIO_DESCRIPTION); + if (!videoUrls.isEmpty()) { + addFilm(aElement, "", videoUrls); + } + if (!videoSubs.isEmpty()) { + addFilm(aElement, " (mit Untertitel)", videoSubs); + } + // + Map originalVersion = buildVideoUrls(aElement, ArteVideoType.ORIGINAL); + Map originalVersionSubs = buildVideoUrls(aElement, ArteVideoType.ORIGINAL_WITH_SUBTITLE); + if (!originalVersion.isEmpty()) { + addFilm(aElement, " (Originalversion)", originalVersion); + } + if (!originalVersionSubs.isEmpty()) { // es gibt nur FR und FR mit UT dann nehmen wir FR mit UT + addFilm(aElement, " (Originalversion mit Untertitel)", originalVersionSubs); + } + } + + protected void addFilm(ArteVideoInfoDto videoInfo, String titleSuffix, Map video) { + final LocalDateTime localDateTime = buildAired(videoInfo); + String date = localDateTime.format(DATE_FORMAT); + String time = localDateTime.format(TIME_FORMAT); + + DatenFilm film = new DatenFilm(sender, buildTopic(videoInfo), buildWebsite(videoInfo), buildTitle(videoInfo) + titleSuffix, + video.get(Qualities.NORMAL), "" /*urlRtmp*/, + date, time, buildDuration(videoInfo).getSeconds(), buildDescription(videoInfo)); + if (video.containsKey(Qualities.HD)) { + CrawlerTool.addUrlHd(film, video.get(Qualities.HD)); + } + if (video.containsKey(Qualities.SMALL)) { + CrawlerTool.addUrlKlein(film, video.get(Qualities.SMALL)); + } + + final GeoLocations geoLocations = buildGeoLocation(videoInfo); + if (geoLocations != GeoLocations.GEO_NONE) { + film.arr[DatenFilm.FILM_GEO] = geoLocations.getDescription(); + } + + if (!taskResults.add(film)) { + log.info("Duplicate {}", film); + } + } + + protected String buildTitle(ArteVideoInfoDto aElement) { + String title = aElement.getTitle().get(); + if (aElement.getSubtitle().isPresent()) { + title += " - " + aElement.getSubtitle().get(); + } + return title; + } + + protected String buildTopic(ArteVideoInfoDto aElement) { + String topic = aElement.getCategoryName().get(); + if (aElement.getSubcategoryName().isPresent()) { + topic += " - " + aElement.getSubcategoryName().get(); + } + return topic; + } + + protected LocalDateTime buildAired(ArteVideoInfoDto aElement) { + String value = aElement. + getBroadcastBeginRounded().orElse( + aElement.getBroadcastBegin().orElse( + aElement.getFirstBroadcastDate().orElse( + aElement.getCreationDate().orElse("")))); + LocalDateTime local = LocalDateTime.parse(value, DATE_FORMATTER); + ZonedDateTime zoned = local.atZone(ZONE_ID); + int hoursToAdd = zoned.getOffset().getTotalSeconds() / 3600; + return local.plusHours(hoursToAdd); + } + + protected Duration buildDuration(ArteVideoInfoDto aElement) { + return Duration.ofSeconds(Integer.parseInt(aElement.getDurationSeconds().get())); + } + + protected GeoLocations buildGeoLocation(ArteVideoInfoDto aElement) { + GeoLocations geo = GeoLocations.GEO_NONE; + if (aElement.getGeoblockingZone().isPresent()) { + String code = aElement.getGeoblockingZone().get(); + switch (code) { + case "DE_FR": + geo = GeoLocations.GEO_DE_FR; + break; + case "EUR_DE_FR": + geo = GeoLocations.GEO_DE_AT_CH_FR; + break; + case "SAT": + geo = GeoLocations.GEO_DE_AT_CH_EU; + break; + case "ALL": + geo = GeoLocations.GEO_NONE; + break; + default: + log.debug("New ARTE GeoLocation: {}", code); + } + } + return geo; + } + + protected String buildDescription(ArteVideoInfoDto aElement) { + return aElement.getShortDescription().orElse(""); + } + + protected String buildWebsite(ArteVideoInfoDto aElement) { + if (aElement.getWebsite().isEmpty()) { + return ""; + } + return aElement.getWebsite().get(); + } + + protected Map buildVideoUrls(ArteVideoInfoDto aElement, ArteVideoType type) { + final Map urls = new EnumMap<>(Qualities.class); + aElement.getVideoLinks().forEach(entry -> { + Optional audioTypeCode = ArteRestVideoTypeMapper.map(sender, entry.getAudioCode().get()); + if (audioTypeCode.isPresent() && audioTypeCode.get().equals(type)) { + urls.put(ArteRestVideoTypeMapper.mapQuality(entry.getQuality().get()).get(), entry.getUrl().get()); + } + }); + return urls; + } + +} diff --git a/src/main/java/mServer/crawler/sender/arte/tasks/ArteVideoInfoTask.java b/src/main/java/mServer/crawler/sender/arte/tasks/ArteVideoInfoTask.java new file mode 100644 index 000000000..bcc05df10 --- /dev/null +++ b/src/main/java/mServer/crawler/sender/arte/tasks/ArteVideoInfoTask.java @@ -0,0 +1,97 @@ +package mServer.crawler.sender.arte.tasks; + +import java.lang.reflect.Type; +import java.net.URI; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ConcurrentLinkedQueue; + +import de.mediathekview.mlib.tool.Log; +import mServer.crawler.CrawlerTool; +import mServer.crawler.sender.MediathekReader; +import mServer.crawler.sender.base.AbstractJsonRestTask; +import mServer.crawler.sender.base.AbstractRecursivConverterTask; +import mServer.crawler.sender.base.PagedElementListDTO; +import mServer.crawler.sender.base.TopicUrlDTO; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import com.google.gson.JsonDeserializer; +import com.google.gson.reflect.TypeToken; + +import mServer.crawler.sender.arte.ArteConstants; +import mServer.crawler.sender.arte.json.ArteVideoInfoDeserializer; +import mServer.crawler.sender.arte.json.ArteVideoInfoDto; +import jakarta.ws.rs.core.Response; + +public class ArteVideoInfoTask + extends AbstractJsonRestTask, TopicUrlDTO> { + private static final long serialVersionUID = 1L; + protected final transient Logger log = LogManager.getLogger(this.getClass()); + protected transient Optional> nextPageTask = Optional.empty(); + + + public ArteVideoInfoTask(MediathekReader crawler, ConcurrentLinkedQueue urlToCrawlDTOs) { + super(crawler, urlToCrawlDTOs, Optional.of(ArteConstants.API_TOKEN)); + } + + @Override + protected JsonDeserializer> getParser(TopicUrlDTO aDTO) { + return new ArteVideoInfoDeserializer(); + } + + @Override + protected Type getType() { + return new TypeToken>() {}.getType(); + } + + protected void postProcessingNextPage(PagedElementListDTO aResponseObj) { + if (aResponseObj.getNextPage().isEmpty()) { + return; + } + int maxPages = Math.min(100, getMaximumSubpages()); + if (aResponseObj.getNextPage().get().contains("age="+maxPages)) { + log.debug("stop at page url {} due to limit {}", aResponseObj.getNextPage().get(), maxPages); + return; + } + + final ConcurrentLinkedQueue nextPageLinks = new ConcurrentLinkedQueue<>(); + nextPageLinks.add(new TopicUrlDTO(aResponseObj.getNextPage().get(), aResponseObj.getNextPage().get())); + nextPageTask = Optional.of(createNewOwnInstance(nextPageLinks)); + nextPageTask.get().fork(); + } + + private int getMaximumSubpages() { + if (CrawlerTool.loadLongMax()) { + return 10; + } else { + return 3; + } + } + + protected void postProcessingElements(Set elements) { + for (ArteVideoInfoDto element : elements) { + taskResults.add(element); + } + } + + @Override + protected void postProcessing(PagedElementListDTO aResponseObj, TopicUrlDTO aDTO) { + postProcessingNextPage(aResponseObj); + postProcessingElements(aResponseObj.getElements()); + nextPageTask.ifPresent(paginationResults -> postProcessingElements(paginationResults.join())); + + } + + @Override + protected AbstractRecursivConverterTask createNewOwnInstance( + ConcurrentLinkedQueue aElementsToProcess) { + return new ArteVideoInfoTask(crawler, aElementsToProcess); + } + + @Override + protected void handleHttpError(TopicUrlDTO dto, URI url, Response response) { + Log.errorLog(45983789, "http error " + response.getStatus() + ": " + url); + log.fatal("A HTTP error {} occurred when getting REST VideoInfo information from: \"{}\".", response.getStatus(), url); + } +} diff --git a/src/main/java/mServer/crawler/sender/arte/tasks/ArteVideoLinkTask.java b/src/main/java/mServer/crawler/sender/arte/tasks/ArteVideoLinkTask.java new file mode 100644 index 000000000..e3f3b3732 --- /dev/null +++ b/src/main/java/mServer/crawler/sender/arte/tasks/ArteVideoLinkTask.java @@ -0,0 +1,65 @@ +package mServer.crawler.sender.arte.tasks; + +import java.lang.reflect.Type; +import java.net.URI; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.ConcurrentLinkedQueue; + +import de.mediathekview.mlib.tool.Log; +import mServer.crawler.sender.MediathekReader; +import mServer.crawler.sender.base.AbstractJsonRestTask; +import mServer.crawler.sender.base.AbstractRecursivConverterTask; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import com.google.gson.JsonDeserializer; +import com.google.gson.reflect.TypeToken; + +import mServer.crawler.sender.arte.json.ArteVideoInfoDto; +import mServer.crawler.sender.arte.json.ArteVideoLinkDeserializer; +import mServer.crawler.sender.arte.json.ArteVideoLinkDto; +import jakarta.ws.rs.core.Response; + +public class ArteVideoLinkTask + extends AbstractJsonRestTask, ArteVideoInfoDto> { + private static final long serialVersionUID = 1L; + protected final transient Logger log = LogManager.getLogger(this.getClass()); + + + public ArteVideoLinkTask(MediathekReader crawler, ConcurrentLinkedQueue videoInfo) { + super(crawler, videoInfo, Optional.empty()); + } + + @Override + protected JsonDeserializer> getParser(ArteVideoInfoDto aDTO) { + return new ArteVideoLinkDeserializer(); + } + + @Override + protected Type getType() { + return new TypeToken>() {}.getType(); + } + + protected void postProcessingElements(List elements, ArteVideoInfoDto aDTO) { + aDTO.setVideoLinks(elements); + taskResults.add(aDTO); + } + + @Override + protected void postProcessing(List aResponseObj, ArteVideoInfoDto aDTO) { + postProcessingElements(aResponseObj, aDTO); + } + + @Override + protected AbstractRecursivConverterTask createNewOwnInstance( + ConcurrentLinkedQueue aElementsToProcess) { + return new ArteVideoLinkTask(crawler, aElementsToProcess); + } + + @Override + protected void handleHttpError(ArteVideoInfoDto dto, URI url, Response response) { + Log.errorLog(89723823, "http error: " + response.getStatus() + " " + url); + log.fatal("A HTTP error {} occurred when getting REST VideoLink information from: \"{}\".", response.getStatus(), url); + } +} diff --git a/src/main/java/mServer/crawler/sender/base/JsoupConnection.java b/src/main/java/mServer/crawler/sender/base/JsoupConnection.java index cad64c2f8..8ab4d1a82 100644 --- a/src/main/java/mServer/crawler/sender/base/JsoupConnection.java +++ b/src/main/java/mServer/crawler/sender/base/JsoupConnection.java @@ -1,17 +1,37 @@ package mServer.crawler.sender.base; +import com.google.gson.Gson; +import com.google.gson.JsonElement; +import okhttp3.*; import org.jsoup.Connection; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.parser.Parser; import java.io.IOException; +import java.util.Map; +import java.util.concurrent.TimeUnit; /** * Helper Class to get rid of static method call for better testability */ public class JsoupConnection { + private final OkHttpClient client; + + @Deprecated + public JsoupConnection() { client = null; } + + public JsoupConnection(final int timeout, final int threadPoolSize) { + client = + new OkHttpClient.Builder() + .connectTimeout(timeout, TimeUnit.SECONDS) + .readTimeout(timeout, TimeUnit.SECONDS) + .callTimeout(timeout, TimeUnit.SECONDS) + .connectionPool(new ConnectionPool(threadPoolSize, 5L, TimeUnit.MINUTES)) + .build(); + } + public Connection getConnection(String url) { return Jsoup.connect(url); } @@ -28,4 +48,52 @@ public Document getDocumentTimeoutAfterAlternativeDocumentType(String url, int t return getConnection(url).timeout(timeoutInMilliseconds).parser(parser).get(); } + /** + * Request an url and receive the body as String. Add headers as a string map. + * @param url + * @param headerMap + * @return + * @throws IOException + */ + public String requestBodyAsString(final String url, final Map headerMap) throws IOException { + int retry = 0; + int httpResponseCode; + final String responseString = ""; + do { + okhttp3.Headers.Builder headerBuilder = new Headers.Builder(); + if (headerMap != null) { + for (Map.Entry headerValue : headerMap.entrySet()) { + headerBuilder.add(headerValue.getKey(), headerValue.getValue()); + } + } + Request request = new Request.Builder() + .url(url) + .headers(headerBuilder.build()) + .build(); + + try (final Response response = client.newCall(request).execute()) { + httpResponseCode = response.code(); + if (response.body() == null || httpResponseCode == 404 || httpResponseCode == 410) { + break; + } + if (response.isSuccessful()) { + final ResponseBody responseBody = response.body(); + return responseBody == null ? "" : responseBody.string(); + } + } + retry++; + } while (retry < 3); + return responseString; + } + + /** + * Request an url and receive the body as HTML JSOUP Document + * + * @param url The url to request. + * @return request body as HTML JSOUP Document + * @throws IOException If no connection to the url could be opened. + */ + public JsonElement requestBodyAsJsonElement(final String url, final Map headerMap) throws IOException { + return new Gson().fromJson(requestBodyAsString(url, headerMap), JsonElement.class); + } } diff --git a/src/test/developTest/java/mServer/crawler/sender/arte/ArteCategoryFilmListDeserializerTest.java b/src/test/developTest/java/mServer/crawler/sender/arte/ArteCategoryFilmListDeserializerTest.java deleted file mode 100644 index b545e407c..000000000 --- a/src/test/developTest/java/mServer/crawler/sender/arte/ArteCategoryFilmListDeserializerTest.java +++ /dev/null @@ -1,55 +0,0 @@ -package mServer.crawler.sender.arte; - -import com.google.gson.JsonObject; -import mServer.test.JsonFileReader; -import org.hamcrest.Matchers; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -import java.util.Arrays; -import java.util.Collection; -import java.util.Set; - -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.notNullValue; -import static org.junit.Assert.assertThat; - -@RunWith(Parameterized.class) -public class ArteCategoryFilmListDeserializerTest { - - private final String jsonFile; - private final String[] expectedProgramIds; - private final boolean expectedHasNextPage; - private final String expectedNextPageUrl; - private final ArteCategoryFilmListDeserializer target; - public ArteCategoryFilmListDeserializerTest(String aJsonFile, String[] aProgramIds, boolean aNextPage, String nextPageUrl) { - jsonFile = aJsonFile; - expectedProgramIds = aProgramIds; - expectedHasNextPage = aNextPage; - expectedNextPageUrl = nextPageUrl; - this.target = new ArteCategoryFilmListDeserializer(); - } - - @Parameterized.Parameters - public static Collection data() { - return Arrays.asList(new Object[][]{ - {"/arte/arte_category.json", new String[]{"112511-000-A", "047389-000-A", "109066-000-A", "082669-000-A", "003982-000-A", "021109-000-A"}, false, null}, - {"/arte/arte_video_list1.json", new String[]{"033559-000-A","078154-000-A", "101398-000-A", "109332-000-A", "111063-000-A"}, true, "https://www.arte.tv/api/rproxy/emac/v4/de/web/zones/daeadc71-4306-411a-8590-1c1f484ef5aa/content?abv=B&authorizedCountry=DE&page=2&pageId=MOST_RECENT&zoneIndexInPage=0"} - }); - } - - @Test - public void testDeserialize() { - - JsonObject jsonObject = JsonFileReader.readJson(jsonFile); - - ArteCategoryFilmsDTO actual = target.deserialize(jsonObject, ArteCategoryFilmsDTO.class, null); - - assertThat(actual, notNullValue()); - assertThat(actual.hasNextPage(), equalTo(expectedHasNextPage)); - Set actualProgramIds = actual.getProgramIds(); - assertThat(actualProgramIds, Matchers.containsInAnyOrder(expectedProgramIds)); - assertThat(actual.getNextPageUrl(), equalTo(expectedNextPageUrl)); - } -} diff --git a/src/test/developTest/java/mServer/crawler/sender/arte/ArteSubPageDeserializerTest.java b/src/test/developTest/java/mServer/crawler/sender/arte/ArteSubPageDeserializerTest.java deleted file mode 100644 index 90aa96086..000000000 --- a/src/test/developTest/java/mServer/crawler/sender/arte/ArteSubPageDeserializerTest.java +++ /dev/null @@ -1,55 +0,0 @@ -package mServer.crawler.sender.arte; - -import com.google.gson.JsonObject; -import mServer.test.JsonFileReader; -import org.hamcrest.Matchers; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -import java.util.Arrays; -import java.util.Collection; -import java.util.Set; - -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.notNullValue; -import static org.junit.Assert.assertThat; - -@RunWith(Parameterized.class) -public class ArteSubPageDeserializerTest { - - private final String jsonFile; - private final String[] expectedProgramIds; - private final boolean expectedHasNextPage; - private final String expectedNextPageUrl; - private final ArteSubPageDeserializer target; - public ArteSubPageDeserializerTest(String aJsonFile, String[] aProgramIds, boolean aNextPage, String nextPageUrl) { - jsonFile = aJsonFile; - expectedProgramIds = aProgramIds; - expectedHasNextPage = aNextPage; - expectedNextPageUrl = nextPageUrl; - this.target = new ArteSubPageDeserializer(); - } - - @Parameterized.Parameters - public static Collection data() { - return Arrays.asList(new Object[][]{ - {"/arte/arte_video_list2.json", new String[]{"099708-000-A", "098846-000-A", "111648-001-A", "112235-000-A", "113043-139-A"}, true, "https://www.arte.tv/api/rproxy/emac/v4/de/web/zones/82b597d7-a83b-4dd8-bea8-ad71675fdf23/content?abv=A&authorizedCountry=DE&page=3&pageId=MOST_VIEWED&zoneIndexInPage=0"}, - {"/arte/arte_video_list_last.json", new String[]{"102805-000-A","104017-000-A", "106273-006-A"}, false, null} - }); - } - - @Test - public void testDeserialize() { - - JsonObject jsonObject = JsonFileReader.readJson(jsonFile); - - ArteCategoryFilmsDTO actual = target.deserialize(jsonObject, ArteCategoryFilmsDTO.class, null); - - assertThat(actual, notNullValue()); - assertThat(actual.hasNextPage(), equalTo(expectedHasNextPage)); - Set actualProgramIds = actual.getProgramIds(); - assertThat(actualProgramIds, Matchers.containsInAnyOrder(expectedProgramIds)); - assertThat(actual.getNextPageUrl(), equalTo(expectedNextPageUrl)); - } -} diff --git a/src/test/developTest/java/mServer/crawler/sender/arte/ArteVideoDetailsDeserializerTest.java b/src/test/developTest/java/mServer/crawler/sender/arte/ArteVideoDetailsDeserializerTest.java deleted file mode 100644 index 6d92ad267..000000000 --- a/src/test/developTest/java/mServer/crawler/sender/arte/ArteVideoDetailsDeserializerTest.java +++ /dev/null @@ -1,74 +0,0 @@ -package mServer.crawler.sender.arte; - -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.notNullValue; -import static org.junit.Assert.assertThat; - -import java.util.Arrays; -import java.util.Calendar; -import java.util.Collection; - -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -import com.google.gson.JsonObject; - -import mServer.crawler.sender.base.GeoLocations; -import mServer.test.JsonFileReader; - -@RunWith(Parameterized.class) -public class ArteVideoDetailsDeserializerTest { - - @Parameterized.Parameters - public static Collection data() { - return Arrays.asList(new Object[][] { - { "/arte/arte_video_details_first_several_minors_geo_defr.json", "Kino - Filme", "Detective Dee und der Fluch des Seeungeheuers", "Raffiniertes Fantasy-Spektakel über die Lehrjahre eines chinesischen Sherlock Holmes: Das China der Tang-Dynastie wird nicht nur von ausländischen Feinden angegriffen, sondern auch von einem geheimnisvollen Seeungeheuer bedroht. Auf den Spuren einer großangelegten Intrige wird das Können des jungen Detektivs Dee (Mark Chao) auf eine harte Probe gestellt.", "http://www.arte.tv/de/videos/068339-000-A/detective-dee-und-der-fluch-des-seeungeheuers", "2017-07-10T18:15:00Z", GeoLocations.GEO_DE_FR }, - { "/arte/arte_video_details_several_majors_minors_geo_null.json", "Geschichte - Die Zeit vor dem 20. Jahrhundert", "Griff nach der Weltherrschaft - Ferdinand Magellan", "Ferdinand Magellan und Sir Francis Drake sind zwei faszinierende Entdeckergestalten der Weltgeschichte. Sie kämpften für ihre Nation um die Vorherrschaft in der \"Neuen Welt\". In dieser Folge: Magellan wollte den Spaniern helfen, die Kontrolle des Gewürzhandels zu übernehmen, indem er eine neue Route zu den Gewürzinseln suchte, die nicht durch portugiesische Gewässer führte.", "http://www.arte.tv/de/videos/041863-001-A/griff-nach-der-weltherrschaft", "2017-08-01T16:25:00Z", GeoLocations.GEO_NONE }, - { "/arte/arte_video_details_first_with_catchuprights_past_geo_sat.json", "Kino - Kurzfilme", "Paare - Friederike Kempter/Mehdi Nebou", "Die erfolgreiche Therapiestunde geht weiter: Paare bei ARTE auf der Couch.\nDiesmal deutsch-französische und internationale Stars, \ndie sich schonungslos vor der Kamera des Therapeuten offenbaren. In dieser Folge: Friederike Kempter/Mehdi Nebou", "http://www.arte.tv/de/videos/071363-001-A/paare", "2017-05-22T11:36:00Z", GeoLocations.GEO_DE_AT_CH_EU }, - { "/arte/arte_video_details_first_without_catchuprights_geo_eudefr.json", "Kultur und Pop - Popkultur", "British Style - Anglicism", "Wie kleidet sich Großbritannien? Mode-Kenner Loïc Prigent veranschaulicht in einer Dokumentation und sechs Kurzbeiträgen, was die Mode Großbritanniens so bunt, erstaunlich, manchmal auch vergnüglich und einzigartig macht. ARTE strahlt die kurzen Dokumentationen vom 16. Juli bis 20. August jeweils Sonntags in der Nacht aus.", "http://www.arte.tv/de/videos/074849-004-A/british-style", "2017-07-16T23:30:00Z", GeoLocations.GEO_DE_AT_CH_FR }, - { "/arte/arte_video_details_no_broadcastprogrammings_geo_all.json", "Aktuelles und Gesellschaft - Reportagen und Recherchen", "Syrien: Die Schlacht um Raqqa - ARTE Reportage", "Die kurdischen Kämpfer der Koalition kämpfen in Raqqa unter dem Banner der sogenannten Demokratischen Kräfte Syriens, einer Armee von 30 000 Soldatinnen und Soldaten, unterstützt von den Luftschlägen der Internationalen Koalition. Erster Etappensieg war die Rückeroberung des Staudamms von Tabqa, 40 Kilometer vor der Stadt. ", "http://www.arte.tv/de/videos/076465-000-A/syrien-die-schlacht-um-raqqa", "2017-06-30T13:00:00Z", GeoLocations.GEO_NONE }, - { "/arte/arte_video_details_no_broadcastprogrammings_nocatchuprights.json", "Fernsehfilme und Serien - Serien", "Absolutely Fabulous 20-Jahre-Special: Identity", "Patsy hat ein Identitätsproblem. Nachdem sie jahrelang immer ihr wahres Alter verleugnet hat, fällt ihr nun nicht mehr ein wie alt sie eigentlich ist. Freundin Edina hilft ihr natürlich gerne bei der Suche nach der eigenen Identität. Deren Tochter Saffy, die aus dem Gefängnis entlassen wurde, hat derweil mit anderen Problemen zu kämpfen...", "http://www.arte.tv/de/videos/076845-002-A/absolutely-fabulous-20-jahre-special-identity", "2017-07-13T14:08:00Z", GeoLocations.GEO_NONE }, - { "/arte/arte_video_details_major_with_catchuprights_past.json", "Wissenschaft - Gesundheit und Medizin", "Alkoholsucht: Wundermittel Baclofen?", "Baclofen ist ein Medikament aus der Gruppe der Muskelrelaxantien. Könnte es auch ein Allheilmittel gegen Alkoholismus sein? Die Therapie ist ein Zufallsprodukt und in der Medizinwelt heftig umstritten. Zur Entstehungszeit der Doku war Baclofen noch nicht zugelassen. Ein Jahr lang hat ein Kamerateam Patienten und Ärzte während der ersten großen klinischen Studie begleitet.", "http://www.arte.tv/de/videos/047927-000-A/alkoholsucht-wundermittel-baclofen", "2017-05-20T20:40:00Z", GeoLocations.GEO_DE_AT_CH_FR }, - }); - } - - private final String jsonFile; - private final String expectedBroadcastBegin; - private final String expectedTheme; - private final String expectedTitle; - private final String expectedWebsite; - private final String expectedDescription; - private final GeoLocations geo; - - public ArteVideoDetailsDeserializerTest(String aJsonFile, String aTheme, String aTitle, String aDescription, String aWebsite, String aExpectedBroadcastBegin, GeoLocations aGeo) { - this.jsonFile = aJsonFile; - this.expectedBroadcastBegin = aExpectedBroadcastBegin; - this.expectedTheme = aTheme; - this.expectedTitle = aTitle; - this.expectedDescription = aDescription; - this.expectedWebsite = aWebsite; - this.geo = aGeo; - } - - @Test - public void testDeserialize() { - - JsonObject jsonObject = JsonFileReader.readJson(jsonFile); - - Calendar today = Calendar.getInstance(); - today.set(2017, 6, 11); // 11.07.2017 als heute verwenden - - ArteVideoDetailsDeserializer target = new ArteVideoDetailsDeserializer(today); - ArteVideoDetailsDTO actual = target.deserialize(jsonObject, ArteVideoDetailsDTO.class, null); - - assertThat(actual, notNullValue()); - assertThat(actual.getTheme(), equalTo(expectedTheme)); - assertThat(actual.getTitle(), equalTo(expectedTitle)); - assertThat(actual.getDescription(), equalTo(expectedDescription)); - assertThat(actual.getWebsite(), equalTo(expectedWebsite)); - assertThat(actual.getBroadcastBegin(), equalTo(expectedBroadcastBegin)); - assertThat(actual.getGeoLocation(), equalTo(geo)); - } - -} From 07a94562517d5ed8fa58f1d2124785073efe0306 Mon Sep 17 00:00:00 2001 From: pidoubleyou <22942659+pidoubleyou@users.noreply.github.com> Date: Sun, 30 Nov 2025 18:16:13 +0100 Subject: [PATCH 2/5] merge develop changes --- .../crawler/sender/arte/ArteCrawler.java | 32 +++++++++++++------ .../sender/arte/json/ArteVideoInfoDto.java | 15 ++++++++- .../arte/tasks/ArteDtoVideo2FilmTask.java | 12 +++++-- 3 files changed, 45 insertions(+), 14 deletions(-) diff --git a/src/main/java/mServer/crawler/sender/arte/ArteCrawler.java b/src/main/java/mServer/crawler/sender/arte/ArteCrawler.java index 7e6f4ab10..c2c23ed53 100644 --- a/src/main/java/mServer/crawler/sender/arte/ArteCrawler.java +++ b/src/main/java/mServer/crawler/sender/arte/ArteCrawler.java @@ -1,7 +1,6 @@ package mServer.crawler.sender.arte; import com.google.gson.JsonElement; -import de.mediathekview.mlib.Config; import de.mediathekview.mlib.Const; import de.mediathekview.mlib.daten.DatenFilm; import de.mediathekview.mlib.tool.Log; @@ -46,9 +45,12 @@ protected ArteLanguage getLanguage() { protected RecursiveTask> createCrawlerTask() { try { + final ConcurrentLinkedQueue videoUrls = new ConcurrentLinkedQueue<>(); + videoUrls.addAll(createVideosQueue(getLanguage().toString().toLowerCase())); + final ArteVideoInfoTask aArteRestVideoInfoTask; // DO NOT overload - maximumUrlsPerTask used to reduce threads to 4 - aArteRestVideoInfoTask = new ArteVideoInfoTask(this, createVideosQueue()); + aArteRestVideoInfoTask = new ArteVideoInfoTask(this, videoUrls); final ConcurrentLinkedQueue videos = new ConcurrentLinkedQueue<>(); videos.addAll(aArteRestVideoInfoTask.fork().join()); // @@ -68,23 +70,33 @@ protected RecursiveTask> createCrawlerTask() { return null; } - private ConcurrentLinkedQueue createVideosQueue() { - int maxPages = getMaxPagesForOverview(); + private ConcurrentLinkedQueue createVideosQueue(String language) { + int maxPages = getMaxPagesForOverview(language); final ConcurrentLinkedQueue root = new ConcurrentLinkedQueue<>(); - String rootUrl = String.format(ArteConstants.VIDEOS_URL, 1, getLanguage().toString().toLowerCase()); + String rootUrl = String.format(ArteConstants.VIDEOS_URL, 1, language); root.add(new TopicUrlDTO("all videos1", rootUrl)); if (maxPages >= 100) { - String rootUrl2 = String.format(ArteConstants.VIDEOS_URL_ALT, 1, getLanguage().toString().toLowerCase()); + String rootUrl2 = String.format(ArteConstants.VIDEOS_URL_ALT, 1, language); root.add(new TopicUrlDTO("all videos2", rootUrl2)); } return root; } - private int getMaxPagesForOverview() { + private int getMaxPagesForOverview(String lang) { + final int maxAvailablePages = getNumberOfAvailablePages(lang); + final int configuredMaxPages = getMaximumSubpages(); + if (configuredMaxPages > maxAvailablePages) { + return Math.min(configuredMaxPages, maxAvailablePages / 2); + } else { + return Math.min(configuredMaxPages, configuredMaxPages / 2); + } + } + + private int getNumberOfAvailablePages(String lang) { final int naturalLimit = Math.min(100, getMaximumSubpages()); - String rootUrl = String.format(ArteConstants.VIDEOS_URL, 1, getLanguage().toString().toLowerCase()); - String[] path = {"meta", "videos", "pages"}; try { + String rootUrl = String.format(ArteConstants.VIDEOS_URL, 1, lang); + String[] path= {"meta", "videos", "pages"}; final Map headers = Map.of( "Accept", "application/json", "Content-Type", "application/json", @@ -93,7 +105,7 @@ private int getMaxPagesForOverview() { JsonElement element = jsoupConnection.requestBodyAsJsonElement(rootUrl, headers); Optional pages = JsonUtils.getElementValueAsInteger(element, path); if (pages.isPresent()) { - return Math.min(pages.get(), naturalLimit); + return pages.get(); } } catch (IOException e) { LOG.error("getMaxPagesForOverview", e); diff --git a/src/main/java/mServer/crawler/sender/arte/json/ArteVideoInfoDto.java b/src/main/java/mServer/crawler/sender/arte/json/ArteVideoInfoDto.java index 33f463369..7fa087ad5 100644 --- a/src/main/java/mServer/crawler/sender/arte/json/ArteVideoInfoDto.java +++ b/src/main/java/mServer/crawler/sender/arte/json/ArteVideoInfoDto.java @@ -4,6 +4,7 @@ import java.util.Objects; import java.util.Optional; +import de.mediathekview.mlib.Const; import mServer.crawler.sender.arte.ArteConstants; import mServer.crawler.sender.base.CrawlerUrlDTO; @@ -179,7 +180,19 @@ public List getSubtitleLinks() { public void setSubtitleLinks(List subtitleLinks) { this.subtitleLinks = subtitleLinks; } - + + public String getSender() { + return switch (getLanguage().orElse("")) { + case "de" -> Const.ARTE_DE; + case "en" -> Const.ARTE_EN; + case "fr" -> Const.ARTE_FR; + case "es" -> Const.ARTE_ES; + case "it" -> Const.ARTE_IT; + case "pl" -> Const.ARTE_PL; + default -> Const.ARTE_DE; + }; + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/src/main/java/mServer/crawler/sender/arte/tasks/ArteDtoVideo2FilmTask.java b/src/main/java/mServer/crawler/sender/arte/tasks/ArteDtoVideo2FilmTask.java index 22386e15b..6e961aed0 100644 --- a/src/main/java/mServer/crawler/sender/arte/tasks/ArteDtoVideo2FilmTask.java +++ b/src/main/java/mServer/crawler/sender/arte/tasks/ArteDtoVideo2FilmTask.java @@ -61,12 +61,12 @@ protected void processElement(ArteVideoInfoDto aElement) { protected void parse(ArteVideoInfoDto aElement) { Map videoUrls = buildVideoUrls(aElement, ArteVideoType.DEFAULT); - Map videoSubs = buildVideoUrls(aElement, ArteVideoType.AUDIO_DESCRIPTION); + Map videoAD = buildVideoUrls(aElement, ArteVideoType.AUDIO_DESCRIPTION); if (!videoUrls.isEmpty()) { addFilm(aElement, "", videoUrls); } - if (!videoSubs.isEmpty()) { - addFilm(aElement, " (mit Untertitel)", videoSubs); + if (!videoAD.isEmpty()) { + addFilm(aElement, " (Audiodeskription)", videoAD); } // Map originalVersion = buildVideoUrls(aElement, ArteVideoType.ORIGINAL); @@ -77,6 +77,12 @@ protected void parse(ArteVideoInfoDto aElement) { if (!originalVersionSubs.isEmpty()) { // es gibt nur FR und FR mit UT dann nehmen wir FR mit UT addFilm(aElement, " (Originalversion mit Untertitel)", originalVersionSubs); } + + // ARTE provides subs as a new video + Map videoSub = buildVideoUrls(aElement, ArteVideoType.SUBTITLE_INCLUDED); + if (!videoSub.isEmpty()) { + addFilm(aElement, " (mit Untertitel)", videoSub); + } } protected void addFilm(ArteVideoInfoDto videoInfo, String titleSuffix, Map video) { From 3f9b21f29461224f46d2fe52f2218bee8d9ab703 Mon Sep 17 00:00:00 2001 From: pidoubleyou <22942659+pidoubleyou@users.noreply.github.com> Date: Mon, 1 Dec 2025 22:26:47 +0100 Subject: [PATCH 3/5] add exception handling --- .../arte/tasks/ArteDtoVideo2FilmTask.java | 39 ++++++++++++------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/src/main/java/mServer/crawler/sender/arte/tasks/ArteDtoVideo2FilmTask.java b/src/main/java/mServer/crawler/sender/arte/tasks/ArteDtoVideo2FilmTask.java index 6e961aed0..d79dac2ae 100644 --- a/src/main/java/mServer/crawler/sender/arte/tasks/ArteDtoVideo2FilmTask.java +++ b/src/main/java/mServer/crawler/sender/arte/tasks/ArteDtoVideo2FilmTask.java @@ -2,7 +2,10 @@ import de.mediathekview.mlib.Config; import de.mediathekview.mlib.daten.DatenFilm; +import de.mediathekview.mlib.tool.Log; import mServer.crawler.CrawlerTool; +import mServer.crawler.FilmeSuchen; +import mServer.crawler.RunSender; import mServer.crawler.sender.MediathekReader; import mServer.crawler.sender.arte.ArteRestVideoTypeMapper; import mServer.crawler.sender.arte.ArteVideoType; @@ -90,23 +93,29 @@ protected void addFilm(ArteVideoInfoDto videoInfo, String titleSuffix, Map Date: Mon, 1 Dec 2025 22:52:58 +0100 Subject: [PATCH 4/5] add rate limiter --- .../tasks/ArteRateLimitedJsonRestTask.java | 25 +++++++++++++++++++ .../sender/arte/tasks/ArteVideoInfoTask.java | 2 +- .../sender/arte/tasks/ArteVideoLinkTask.java | 2 +- 3 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 src/main/java/mServer/crawler/sender/arte/tasks/ArteRateLimitedJsonRestTask.java diff --git a/src/main/java/mServer/crawler/sender/arte/tasks/ArteRateLimitedJsonRestTask.java b/src/main/java/mServer/crawler/sender/arte/tasks/ArteRateLimitedJsonRestTask.java new file mode 100644 index 000000000..22c18d88c --- /dev/null +++ b/src/main/java/mServer/crawler/sender/arte/tasks/ArteRateLimitedJsonRestTask.java @@ -0,0 +1,25 @@ +package mServer.crawler.sender.arte.tasks; + +import com.google.common.util.concurrent.RateLimiter; +import jakarta.ws.rs.client.WebTarget; +import mServer.crawler.sender.MediathekReader; +import mServer.crawler.sender.base.AbstractJsonRestTask; +import mServer.crawler.sender.base.CrawlerUrlDTO; + +import java.util.Optional; +import java.util.concurrent.ConcurrentLinkedQueue; + +public abstract class ArteRateLimitedJsonRestTask extends AbstractJsonRestTask { + private static final long serialVersionUID = 1L; + private static final RateLimiter rateLimiter = RateLimiter.create(10.0); + + protected ArteRateLimitedJsonRestTask(MediathekReader aCrawler, ConcurrentLinkedQueue urlToCrawlDTOs, Optional authKey) { + super(aCrawler, urlToCrawlDTOs, authKey); + } + + @Override + protected void processRestTarget(final D aDTO, final WebTarget aTarget) { + rateLimiter.acquire(); + super.processRestTarget(aDTO, aTarget); + } +} \ No newline at end of file diff --git a/src/main/java/mServer/crawler/sender/arte/tasks/ArteVideoInfoTask.java b/src/main/java/mServer/crawler/sender/arte/tasks/ArteVideoInfoTask.java index bcc05df10..aaafbd5b3 100644 --- a/src/main/java/mServer/crawler/sender/arte/tasks/ArteVideoInfoTask.java +++ b/src/main/java/mServer/crawler/sender/arte/tasks/ArteVideoInfoTask.java @@ -25,7 +25,7 @@ import jakarta.ws.rs.core.Response; public class ArteVideoInfoTask - extends AbstractJsonRestTask, TopicUrlDTO> { + extends ArteRateLimitedJsonRestTask, TopicUrlDTO> { private static final long serialVersionUID = 1L; protected final transient Logger log = LogManager.getLogger(this.getClass()); protected transient Optional> nextPageTask = Optional.empty(); diff --git a/src/main/java/mServer/crawler/sender/arte/tasks/ArteVideoLinkTask.java b/src/main/java/mServer/crawler/sender/arte/tasks/ArteVideoLinkTask.java index e3f3b3732..2100e5fb1 100644 --- a/src/main/java/mServer/crawler/sender/arte/tasks/ArteVideoLinkTask.java +++ b/src/main/java/mServer/crawler/sender/arte/tasks/ArteVideoLinkTask.java @@ -22,7 +22,7 @@ import jakarta.ws.rs.core.Response; public class ArteVideoLinkTask - extends AbstractJsonRestTask, ArteVideoInfoDto> { + extends ArteRateLimitedJsonRestTask, ArteVideoInfoDto> { private static final long serialVersionUID = 1L; protected final transient Logger log = LogManager.getLogger(this.getClass()); From a4c792b8b72871631acfc4283d82e68a7bd32a2c Mon Sep 17 00:00:00 2001 From: pidoubleyou <22942659+pidoubleyou@users.noreply.github.com> Date: Tue, 9 Dec 2025 22:52:46 +0100 Subject: [PATCH 5/5] reduce rate --- .../crawler/sender/arte/tasks/ArteRateLimitedJsonRestTask.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/mServer/crawler/sender/arte/tasks/ArteRateLimitedJsonRestTask.java b/src/main/java/mServer/crawler/sender/arte/tasks/ArteRateLimitedJsonRestTask.java index 22c18d88c..35ea04356 100644 --- a/src/main/java/mServer/crawler/sender/arte/tasks/ArteRateLimitedJsonRestTask.java +++ b/src/main/java/mServer/crawler/sender/arte/tasks/ArteRateLimitedJsonRestTask.java @@ -11,7 +11,7 @@ public abstract class ArteRateLimitedJsonRestTask extends AbstractJsonRestTask { private static final long serialVersionUID = 1L; - private static final RateLimiter rateLimiter = RateLimiter.create(10.0); + private static final RateLimiter rateLimiter = RateLimiter.create(6.0); protected ArteRateLimitedJsonRestTask(MediathekReader aCrawler, ConcurrentLinkedQueue urlToCrawlDTOs, Optional authKey) { super(aCrawler, urlToCrawlDTOs, authKey);