Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/main/java/de/mediathekview/mlib/Const.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,11 @@ public class Const {
public static final String ARD = "ARD";
public static final String ARD_ALPHA = "ARD-alpha";
public static final String ARTE_DE = "ARTE.DE";
public static final String ARTE_EN = "ARTE.EN";
public static final String ARTE_ES = "ARTE.ES";
public static final String ARTE_FR = "ARTE.FR";
public static final String ARTE_IT = "ARTE.IT";
public static final String ARTE_PL = "ARTE.PL";
public static final String BR = "BR";
public static final String DW = "DW";
public static final String HR = "HR";
Expand Down
13 changes: 9 additions & 4 deletions src/main/java/mServer/crawler/FilmeSuchen.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import de.mediathekview.mlib.tool.Log;
import mServer.crawler.sender.*;
import mServer.crawler.sender.ard.ArdCrawler;
import mServer.crawler.sender.arte.MediathekArte;
import mServer.crawler.sender.arte.*;
import mServer.crawler.sender.dreisat.DreiSatCrawler;
import mServer.crawler.sender.dw.DwCrawler;
import mServer.crawler.sender.kika.KikaApiCrawler;
Expand Down Expand Up @@ -88,7 +88,12 @@ public FilmeSuchen() {
mediathekListe.add(new ZdfCrawler(this, 0));
}
if (crawlerList.contains("ARTE")) {
mediathekListe.add(new MediathekArte(this, 0));
mediathekListe.add(new ArteCrawler(this, 0));
mediathekListe.add(new ArteCrawler_FR(this, 0));
mediathekListe.add(new ArteCrawler_EN(this, 1));
mediathekListe.add(new ArteCrawler_ES(this, 1));
mediathekListe.add(new ArteCrawler_PL(this, 1));
mediathekListe.add(new ArteCrawler_IT(this, 1));
}
if (crawlerList.contains("DW")) {
mediathekListe.add(new DwCrawler(this, 0));
Expand All @@ -97,7 +102,7 @@ public FilmeSuchen() {
mediathekListe.add(new KikaApiCrawler(this, 0));
}
if (crawlerList.contains("3SAT")) {
mediathekListe.add(new DreiSatCrawler(this, 1));
mediathekListe.add(new DreiSatCrawler(this, 0));
}
if (crawlerList.contains("SR")) {
mediathekListe.add(new SrCrawler(this, 1));
Expand All @@ -112,7 +117,7 @@ public FilmeSuchen() {
mediathekListe.add(new OrfOnCrawler(this, 1));
}
if (crawlerList.contains("PHONIX")) {
mediathekListe.add(new PhoenixCrawler(this, 1));
mediathekListe.add(new PhoenixCrawler(this, 0));
}

}
Expand Down

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

10 changes: 10 additions & 0 deletions src/main/java/mServer/crawler/sender/arte/ArteConstants.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package mServer.crawler.sender.arte;

Check warning on line 1 in src/main/java/mServer/crawler/sender/arte/ArteConstants.java

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Rename this package name to match the regular expression '^[a-z_]+(\.[a-z_][a-z0-9_]*)*$'.

See more on https://sonarcloud.io/project/issues?id=mediathekview_MServer&issues=AZpQtKFUphXyWXQSWIE3&open=AZpQtKFUphXyWXQSWIE3&pullRequest=1098

public class ArteConstants {
public static final String VIDEOS_URL ="https://api.arte.tv/api/opa/v3/videos?limit=100&page=%s&sort=-creationDate&language=%s";
public static final String VIDEOS_URL_ALT ="https://api.arte.tv/api/opa/v3/videos?limit=100&page=%s&sort=creationDate&language=%s";
public static final String VIDEO_URL ="https://www.arte.tv/hbbtvv2/services/web/index.php/OPA/v3/streams/%s/%s/%s"; //PROGRAMID/KIND/LANG
public static final String API_TOKEN = "Bearer Nzc1Yjc1ZjJkYjk1NWFhN2I2MWEwMmRlMzAzNjI5NmU3NWU3ODg4ODJjOWMxNTMxYzEzZGRjYjg2ZGE4MmIwOA";
private ArteConstants() {}

}
125 changes: 125 additions & 0 deletions src/main/java/mServer/crawler/sender/arte/ArteCrawler.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
package mServer.crawler.sender.arte;

Check warning on line 1 in src/main/java/mServer/crawler/sender/arte/ArteCrawler.java

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Rename this package name to match the regular expression '^[a-z_]+(\.[a-z_][a-z0-9_]*)*$'.

See more on https://sonarcloud.io/project/issues?id=mediathekview_MServer&issues=AZpQtKInphXyWXQSWIFp&open=AZpQtKInphXyWXQSWIFp&pullRequest=1098

import com.google.gson.JsonElement;
import de.mediathekview.mlib.Const;
import de.mediathekview.mlib.daten.DatenFilm;
import de.mediathekview.mlib.tool.Log;
import mServer.crawler.CrawlerTool;
import mServer.crawler.FilmeSuchen;
import mServer.crawler.sender.MediathekCrawler;
import mServer.crawler.sender.arte.json.ArteVideoInfoDto;
import mServer.crawler.sender.arte.tasks.ArteDtoVideo2FilmTask;
import mServer.crawler.sender.arte.tasks.ArteVideoInfoTask;
import mServer.crawler.sender.arte.tasks.ArteVideoLinkTask;
import mServer.crawler.sender.base.JsonUtils;
import mServer.crawler.sender.base.JsoupConnection;
import mServer.crawler.sender.base.TopicUrlDTO;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.io.IOException;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.RecursiveTask;

public class ArteCrawler extends MediathekCrawler {
private static final Logger LOG = LogManager.getLogger(ArteCrawler.class);
private final JsoupConnection jsoupConnection;

public ArteCrawler(FilmeSuchen ssearch, int startPrio) {
this(ssearch, startPrio, Const.ARTE_DE);
}

protected ArteCrawler(FilmeSuchen ssearch, int startPrio, String sender) {
super(ssearch, sender,/* threads */ 1, /* urlWarten */ 200, startPrio);
this.jsoupConnection = new JsoupConnection(60, 4);
}

protected ArteLanguage getLanguage() {
return ArteLanguage.DE;
}

@Override
protected RecursiveTask<Set<DatenFilm>> createCrawlerTask() {

try {
final ConcurrentLinkedQueue<TopicUrlDTO> videoUrls = new ConcurrentLinkedQueue<>();
videoUrls.addAll(createVideosQueue(getLanguage().toString().toLowerCase()));

final ArteVideoInfoTask aArteRestVideoInfoTask;
// DO NOT overload - maximumUrlsPerTask used to reduce threads to 4
aArteRestVideoInfoTask = new ArteVideoInfoTask(this, videoUrls);
final ConcurrentLinkedQueue<ArteVideoInfoDto> videos = new ConcurrentLinkedQueue<>();
videos.addAll(aArteRestVideoInfoTask.fork().join());
//
Log.sysLog(getSendername() + " Anzahl video info: " + videos.size());
//
final ConcurrentLinkedQueue<ArteVideoInfoDto> videosWithLink = new ConcurrentLinkedQueue<>();
final ArteVideoLinkTask aArteRestVideosTask = new ArteVideoLinkTask(this, videos);
videosWithLink.addAll(aArteRestVideosTask.fork().join());
//
Log.sysLog(getSendername() + " Anzahl video links: " + videosWithLink.size());
//
return new ArteDtoVideo2FilmTask(this, new ConcurrentLinkedQueue<>(videosWithLink), getSendername());

} catch (final Exception ex) {
LOG.fatal("Exception in {} crawler.", getSendername(), ex);
}
return null;
}

private ConcurrentLinkedQueue<TopicUrlDTO> createVideosQueue(String language) {
int maxPages = getMaxPagesForOverview(language);
final ConcurrentLinkedQueue<TopicUrlDTO> root = new ConcurrentLinkedQueue<>();
String rootUrl = String.format(ArteConstants.VIDEOS_URL, 1, language);
root.add(new TopicUrlDTO("all videos1", rootUrl));
if (maxPages >= 100) {
String rootUrl2 = String.format(ArteConstants.VIDEOS_URL_ALT, 1, language);
root.add(new TopicUrlDTO("all videos2", rootUrl2));
}
return root;
}

private int getMaxPagesForOverview(String lang) {
final int maxAvailablePages = getNumberOfAvailablePages(lang);
final int configuredMaxPages = getMaximumSubpages();
if (configuredMaxPages > maxAvailablePages) {
return Math.min(configuredMaxPages, maxAvailablePages / 2);
} else {
return Math.min(configuredMaxPages, configuredMaxPages / 2);
}
}

private int getNumberOfAvailablePages(String lang) {
final int naturalLimit = Math.min(100, getMaximumSubpages());
try {
String rootUrl = String.format(ArteConstants.VIDEOS_URL, 1, lang);
String[] path= {"meta", "videos", "pages"};
final Map<String, String> headers = Map.of(
"Accept", "application/json",
"Content-Type", "application/json",
"Authorization", ArteConstants.API_TOKEN
);
JsonElement element = jsoupConnection.requestBodyAsJsonElement(rootUrl, headers);
Optional<Integer> pages = JsonUtils.getElementValueAsInteger(element, path);
if (pages.isPresent()) {
return pages.get();
}
} catch (IOException e) {
LOG.error("getMaxPagesForOverview", e);
}
return naturalLimit;
}

private int getMaximumSubpages() {
if (CrawlerTool.loadLongMax()) {
return 10;
} else {
return 1;
}
}
}


17 changes: 17 additions & 0 deletions src/main/java/mServer/crawler/sender/arte/ArteCrawler_EN.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package mServer.crawler.sender.arte;

Check warning on line 1 in src/main/java/mServer/crawler/sender/arte/ArteCrawler_EN.java

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Rename this package name to match the regular expression '^[a-z_]+(\.[a-z_][a-z0-9_]*)*$'.

See more on https://sonarcloud.io/project/issues?id=mediathekview_MServer&issues=AZpQtKIephXyWXQSWIFo&open=AZpQtKIephXyWXQSWIFo&pullRequest=1098

import de.mediathekview.mlib.Const;
import mServer.crawler.FilmeSuchen;

public class ArteCrawler_EN extends ArteCrawler {

Check warning on line 6 in src/main/java/mServer/crawler/sender/arte/ArteCrawler_EN.java

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Rename this class name to match the regular expression '^[A-Z][a-zA-Z0-9]*$'.

See more on https://sonarcloud.io/project/issues?id=mediathekview_MServer&issues=AZpQtKIephXyWXQSWIFn&open=AZpQtKIephXyWXQSWIFn&pullRequest=1098

public ArteCrawler_EN(FilmeSuchen ssearch, int startPrio) {
super(ssearch, startPrio, Const.ARTE_EN);
}

@Override
protected ArteLanguage getLanguage() {
return ArteLanguage.EN;
}

}
17 changes: 17 additions & 0 deletions src/main/java/mServer/crawler/sender/arte/ArteCrawler_ES.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package mServer.crawler.sender.arte;

Check warning on line 1 in src/main/java/mServer/crawler/sender/arte/ArteCrawler_ES.java

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Rename this package name to match the regular expression '^[a-z_]+(\.[a-z_][a-z0-9_]*)*$'.

See more on https://sonarcloud.io/project/issues?id=mediathekview_MServer&issues=AZpQtKICphXyWXQSWIFa&open=AZpQtKICphXyWXQSWIFa&pullRequest=1098

import de.mediathekview.mlib.Const;
import mServer.crawler.FilmeSuchen;

public class ArteCrawler_ES extends ArteCrawler {

Check warning on line 6 in src/main/java/mServer/crawler/sender/arte/ArteCrawler_ES.java

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Rename this class name to match the regular expression '^[A-Z][a-zA-Z0-9]*$'.

See more on https://sonarcloud.io/project/issues?id=mediathekview_MServer&issues=AZpQtKICphXyWXQSWIFZ&open=AZpQtKICphXyWXQSWIFZ&pullRequest=1098

public ArteCrawler_ES(FilmeSuchen ssearch, int startPrio) {
super(ssearch, startPrio, Const.ARTE_ES);
}

@Override
protected ArteLanguage getLanguage() {
return ArteLanguage.ES;
}

}
Loading