0
0
mirror of https://github.com/RSS-Bridge/rss-bridge.git synced 2024-11-23 07:37:35 +00:00
RSS-Bridge_rss-bridge/bridges/YouTubeCommunityTabBridge.php
Thomas adad9d6405
[YouTubeCommunityTabBridge] Improve JSON extraction (#4140)
Small change that should make the extraction of JSON from HTML work more
reliably
2024-06-24 22:32:03 +02:00

331 lines
10 KiB
PHP

<?php
class YouTubeCommunityTabBridge extends BridgeAbstract
{
const NAME = 'YouTube Community Tab Bridge';
const URI = 'https://www.youtube.com';
const DESCRIPTION = 'Returns posts from a channel\'s community tab';
const MAINTAINER = 'VerifiedJoseph';
const PARAMETERS = [
'By channel ID' => [
'channel' => [
'name' => 'Channel ID',
'type' => 'text',
'required' => true,
'exampleValue' => 'UCULkRHBdLC5ZcEQBaL0oYHQ'
]
],
'By username' => [
'username' => [
'name' => 'Username',
'type' => 'text',
'required' => true,
'exampleValue' => 'YouTubeUK'
],
]
];
const CACHE_TIMEOUT = 3600; // 1 hour
private $feedUrl = '';
private $feedName = '';
private $itemTitle = '';
private $urlRegex = '/youtube\.com\/(channel|user|c)\/([\w]+)\/community/';
private $jsonRegex = '/var ytInitialData = ([^<]*);<\/script>/';
public function detectParameters($url)
{
$params = [];
if (preg_match($this->urlRegex, $url, $matches)) {
if ($matches[1] === 'channel') {
$params['context'] = 'By channel ID';
$params['channel'] = $matches[2];
}
if ($matches[1] === 'user') {
$params['context'] = 'By username';
$params['username'] = $matches[2];
}
return $params;
}
return null;
}
public function collectData()
{
if (is_null($this->getInput('username')) === false) {
try {
$this->feedUrl = $this->buildCommunityUri($this->getInput('username'), 'c');
$html = getSimpleHTMLDOM($this->feedUrl);
} catch (Exception $e) {
$this->feedUrl = $this->buildCommunityUri($this->getInput('username'), 'user');
$html = getSimpleHTMLDOM($this->feedUrl);
}
} else {
$this->feedUrl = $this->buildCommunityUri($this->getInput('channel'), 'channel');
$html = getSimpleHTMLDOM($this->feedUrl);
}
$json = $this->extractJson($html->find('html', 0)->innertext);
$this->feedName = $json->header->c4TabbedHeaderRenderer->title;
if ($this->hasCommunityTab($json) === false) {
returnServerError('Channel does not have a community tab');
}
$posts = $this->getCommunityPosts($json);
foreach ($posts as $key => $post) {
$this->itemTitle = '';
if (!isset($post->backstagePostThreadRenderer)) {
continue;
}
if (isset($post->backstagePostThreadRenderer->post->backstagePostRenderer)) {
$details = $post->backstagePostThreadRenderer->post->backstagePostRenderer;
} elseif (isset($post->backstagePostThreadRenderer->post->sharedPostRenderer)) {
// todo: properly extract data from this shared post
$details = $post->backstagePostThreadRenderer->post->sharedPostRenderer;
} else {
continue;
}
$item = [];
$item['uri'] = self::URI . '/post/' . $details->postId;
$item['author'] = $details->authorText->runs[0]->text ?? null;
$item['content'] = $item['uri'];
if (isset($details->contentText->runs)) {
$text = $this->getText($details->contentText->runs);
$this->itemTitle = $this->ellipsisTitle($text);
$item['content'] = $text;
}
$item['content'] .= $this->getAttachments($details);
$item['title'] = $this->itemTitle;
$date = strtotime(str_replace(' (edited)', '', $details->publishedTimeText->runs[0]->text));
if (is_int($date)) {
// subtract an increasing multiple of 60 seconds to always preserve the original order
$item['timestamp'] = $date - $key * 60;
}
$this->items[] = $item;
}
}
public function getURI()
{
if (!empty($this->feedUrl)) {
return $this->feedUrl;
}
return parent::getURI();
}
public function getName()
{
if (!empty($this->feedName)) {
return $this->feedName . ' - YouTube Community Tab';
}
return parent::getName();
}
/**
* Build Community URI
*/
private function buildCommunityUri($value, $type)
{
return self::URI . '/' . $type . '/' . $value . '/community';
}
/**
* Extract JSON from page
*/
private function extractJson($html)
{
if (!preg_match($this->jsonRegex, $html, $parts)) {
returnServerError('Failed to extract data from page');
}
$data = json_decode($parts[1]);
if ($data === false) {
returnServerError('Failed to decode extracted data');
}
return $data;
}
/**
* Check if channel has a community tab
*/
private function hasCommunityTab($json)
{
foreach ($json->contents->twoColumnBrowseResultsRenderer->tabs as $tab) {
if (
isset($tab->tabRenderer)
&& str_ends_with($tab->tabRenderer->endpoint->commandMetadata->webCommandMetadata->url, 'community')
) {
return true;
}
}
return false;
}
/**
* Get community tab posts
*/
private function getCommunityPosts($json)
{
foreach ($json->contents->twoColumnBrowseResultsRenderer->tabs as $tab) {
if (
isset($tab->tabRenderer)
&& str_ends_with($tab->tabRenderer->endpoint->commandMetadata->webCommandMetadata->url, 'community')
) {
return $tab->tabRenderer->content->sectionListRenderer->contents[0]->itemSectionRenderer->contents;
}
}
}
/**
* Get text content for a post
*/
private function getText($runs)
{
$text = '';
foreach ($runs as $part) {
if (isset($part->navigationEndpoint->browseEndpoint->canonicalBaseUrl)) {
$text .= $this->formatUrls($part->text, $part->navigationEndpoint->browseEndpoint->canonicalBaseUrl);
} elseif (isset($part->navigationEndpoint->urlEndpoint->url)) {
$text .= $this->formatUrls($part->text, $part->navigationEndpoint->urlEndpoint->url);
} elseif (isset($part->navigationEndpoint->commandMetadata->webCommandMetadata->url)) {
$text .= $this->formatUrls($part->text, $part->navigationEndpoint->commandMetadata->webCommandMetadata->url);
} else {
$text .= $this->formatUrls($part->text, null);
}
}
return nl2br($text);
}
/**
* Get attachments for posts
*/
private function getAttachments($details)
{
$content = '';
if (isset($details->backstageAttachment)) {
$attachments = $details->backstageAttachment;
if (isset($attachments->videoRenderer) && isset($attachments->videoRenderer->videoId)) {
// Video
if (empty($this->itemTitle)) {
$this->itemTitle = $this->feedName . ' posted a video';
}
$content = <<<EOD
<iframe width="100%" height="410" src="https://www.youtube.com/embed/{$attachments->videoRenderer->videoId}"
frameborder="0" allow="encrypted-media;" allowfullscreen></iframe>
EOD;
} elseif (isset($attachments->backstageImageRenderer)) {
// Image
if (empty($this->itemTitle)) {
$this->itemTitle = $this->feedName . ' posted an image';
}
$lastThumb = end($attachments->backstageImageRenderer->image->thumbnails);
$content = <<<EOD
<p><img src="{$lastThumb->url}"></p>
EOD;
} elseif (isset($attachments->pollRenderer)) {
// Poll
if (empty($this->itemTitle)) {
$this->itemTitle = $this->feedName . ' posted a poll';
}
$pollChoices = '';
foreach ($attachments->pollRenderer->choices as $choice) {
$pollChoices .= <<<EOD
<li>{$choice->text->runs[0]->text}</li>
EOD;
}
$content = <<<EOD
<hr><p>Poll ({$attachments->pollRenderer->totalVotes->simpleText})<br><ul>{$pollChoices}</ul><p>
EOD;
} elseif (isset($attachments->postMultiImageRenderer->images)) {
// Multiple images
$images = $attachments->postMultiImageRenderer->images;
if (is_array($images)) {
if (empty($this->itemTitle)) {
$this->itemTitle = $this->feedName . ' posted ' . count($images) . ' images';
}
foreach ($images as $image) {
$lastThumb = end($image->backstageImageRenderer->image->thumbnails);
$content .= <<<EOD
<p><img src="{$lastThumb->url}"></p>
EOD;
}
}
}
}
return $content;
}
/*
Ellipsis text for title
*/
private function ellipsisTitle($text)
{
$length = 100;
$text = strip_tags($text);
if (strlen($text) > $length) {
$text = explode('<br>', wordwrap($text, $length, '<br>'));
return $text[0] . '...';
}
return $text;
}
private function formatUrls($content, $url)
{
if (substr(strval($url), 0, 1) == '/') {
// fix relative URL
$url = 'https://www.youtube.com' . $url;
} elseif (substr(strval($url), 0, 33) == 'https://www.youtube.com/redirect?') {
// extract actual URL from YouTube redirect
parse_str(substr($url, 33), $params);
if (strpos(($params['q'] ?? ''), rtrim($content, '.')) === 0) {
$url = $params['q'];
}
}
// ensure all URLs are made clickable
$url = $url ?? $content;
if (filter_var($url, FILTER_VALIDATE_URL)) {
return '<a href="' . $url . '" target="_blank">' . $content . '</a>';
}
return $content;
}
}