RSS-Bridge_rss-bridge/bridges/FSecureBlogBridge.php

127 lines
4.0 KiB
PHP

<?php
class FSecureBlogBridge extends BridgeAbstract
{
const NAME = 'F-Secure Blog';
const URI = 'https://blog.f-secure.com';
const DESCRIPTION = 'F-Secure Blog';
const MAINTAINER = 'simon816';
const PARAMETERS = [
'' => [
'categories' => [
'name' => 'Blog categories',
'exampleValue' => 'home-security',
],
'language' => [
'name' => 'Language',
'required' => true,
'defaultValue' => 'en',
],
'oldest_date' => [
'name' => 'Oldest article date',
'exampleValue' => '-6 months',
],
]
];
public function getURI()
{
$lang = $this->getInput('language') or 'en';
if ($lang === 'en') {
return self::URI;
}
return self::URI . "/$lang";
}
public function collectData()
{
$this->items = [];
$this->seen = [];
$this->oldest = strtotime($this->getInput('oldest_date')) ?: 0;
$categories = $this->getInput('categories');
if (!empty($categories)) {
foreach (explode(',', $categories) as $cat) {
if (!empty($cat)) {
$this->collectCategory($cat);
}
}
return;
}
$html = getSimpleHTMLDOMCached($this->getURI() . '/');
foreach ($html->find('ul.c-header-menu-desktop__list li a') as $link) {
$url = parse_url($link->href);
if (($pos = strpos($url['path'], '/category/')) !== false) {
$cat = substr($url['path'], $pos + strlen('/category/'), -1);
$this->collectCategory($cat);
}
}
}
private function collectCategory($category)
{
$url = $this->getURI() . "/category/$category/";
while ($url) {
//Limit total amount of requests
if (count($this->items) >= 20) {
break;
}
$url = $this->collectListing($url);
}
}
// n.b. this relies on articles to be ordered by date so the cutoff works
private function collectListing($url)
{
$html = getSimpleHTMLDOMCached($url, 60 * 60);
$items = $html->find('section.b-blog .l-blog__content__listing div.c-listing-item');
$catName = trim($html->find('section.b-blog .c-blog-header__title', 0)->plaintext);
foreach ($items as $item) {
$url = $item->getAttribute('data-url');
if (!$this->collectArticle($url)) {
return null; // Too old, stop collecting
}
}
// Point's to 404 for non-english blog
// $next = $html->find('link[rel=next]', 0);
$next = $html->find('ul.page-numbers a.next', 0);
return $next ? $next->href : null;
}
// Returns a boolean whether to continue collecting articles
// i.e. date is after oldest cutoff
private function collectArticle($url)
{
if (array_key_exists($url, $this->seen)) {
return true;
}
$html = getSimpleHTMLDOMCached($url);
$rssItem = [ 'uri' => $url, 'uid' => $url ];
$rssItem['title'] = $html->find('meta[property=og:title]', 0)->content;
$dt = $html->find('meta[property=article:published_time]', 0)->content;
// Exit if too old
if (strtotime($dt) < $this->oldest) {
return false;
}
$rssItem['timestamp'] = $dt;
$img = $html->find('meta[property=og:image]', 0);
$rssItem['enclosures'] = $img ? [$img->content] : [];
$rssItem['author'] = trim($html->find('.c-blog-author__text a', 0)->plaintext);
$rssItem['categories'] = array_map(function ($link) {
return trim($link->plaintext);
}, $html->find('.b-single-header__categories .c-category-list a'));
$rssItem['content'] = trim($html->find('article', 0)->innertext);
$this->items[] = $rssItem;
$this->seen[$url] = 1;
return true;
}
}