Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@ public final class YoutubeParsingHelper {
private YoutubeParsingHelper() {
}

/**
* The base URL for plain Youtube.
*/
public static final String YOUTUBE_BASE = "https://www.youtube.com/";

/**
* The base URL of requests of the {@code WEB} clients to the InnerTube internal API.
*/
Expand Down Expand Up @@ -212,6 +217,11 @@ private YoutubeParsingHelper() {
private static final String CONTENT_PLAYBACK_NONCE_ALPHABET =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";

/**
* Regex for extracing any JSON array.
*/
private static final String JSON_ARRAY = "\\[.*\\]";

/**
* The device machine id for the iPhone 15 Pro Max,
* used to get 60fps with the {@code iOS} client.
Expand Down Expand Up @@ -322,6 +332,35 @@ public static String randomVisitorData(final ContentCountry country) {
return pb.toUrlencodedBase64();
}

/**
* Requests and parses out the visitor data from the sw.js_data YT endpoint.
* This function does not parse it into a programmatic form, just returns the encoded string.
* Useful for passing into API requests which require visitorData to work.
* The function currently uses very brittle extraction logic.
* Likely to fail with future changes.
*
* @return extracted encoded visitor data string
* @throws ParsingException if the format of data is no longer a JSON array
* @throws IOException when it cannot fetch the API data
* @throws ReCaptchaException when it cannot fetch the API data
*/
public static String extractVisitorData()
throws ParsingException, IOException, ReCaptchaException {
final String url = YOUTUBE_BASE + "sw.js_data";
final var headers = getOriginReferrerHeaders(YOUTUBE_BASE);
final String response = getDownloader().get(url, headers).responseBody();
final JsonArray jsonArray = JsonUtils.toJsonArray(
Parser.matchGroup(JSON_ARRAY, response, 0));
// Got this particular extraction logic by finding where the visitor data
// lives through comparison. If the structure changes this is likely to fail.
return jsonArray
.getArray(0)
.getArray(2)
.getArray(0)
.getArray(0)
.getString(13);
}

/**
* Parses the duration string of the video expecting ":" or "." as separators
*
Expand Down Expand Up @@ -1264,6 +1303,16 @@ public static JsonBuilder<JsonObject> prepareAndroidMobileJsonBuilder(
public static JsonBuilder<JsonObject> prepareIosMobileJsonBuilder(
@Nonnull final Localization localization,
@Nonnull final ContentCountry contentCountry) {

// Try to extract the visitor data from the sw.js_data API, but otherwise
// fall back to randomly generating the visitor data.
String visitorData = null;
try {
visitorData = extractVisitorData();
} catch (ParsingException | IOException | ReCaptchaException e) {
visitorData = randomVisitorData(contentCountry);
}

// @formatter:off
return JsonObject.builder()
.object("context")
Expand All @@ -1276,7 +1325,7 @@ public static JsonBuilder<JsonObject> prepareIosMobileJsonBuilder(
.value("platform", "MOBILE")
.value("osName", "iOS")
.value("osVersion", IOS_OS_VERSION)
.value("visitorData", randomVisitorData(contentCountry))
.value("visitorData", visitorData)
.value("hl", localization.getLocalizationCode())
.value("gl", contentCountry.getCountryCode())
.value("utcOffsetMinutes", 0)
Expand Down
Loading