LCOV - code coverage report
Current view: top level - src/utilities - GoogleNewsSitemapSynthesizer.cpp (source / functions) Coverage Total Hit
Test: coverage.info.cleaned Lines: 40.7 % 327 133
Test Date: 2026-03-23 10:19:47 Functions: 57.7 % 26 15

            Line data    Source code
       1              : #include "GoogleNewsSitemapSynthesizer.h"
       2              : #include "FangLogging.h"
       3              : 
       4              : #include <algorithm>
       5              : #include <QSet>
       6              : 
       7           66 : static QString stripWww(const QString& host)
       8              : {
       9           66 :     if (host.startsWith("www.")) {
      10           56 :         return host.mid(4);
      11              :     }
      12           10 :     return host;
      13              : }
      14              : 
      15           14 : GoogleNewsSitemapSynthesizer::GoogleNewsSitemapSynthesizer(QObject* parent)
      16              :     : FangObject(parent)
      17           14 :     , isRefresh(false)
      18           14 :     , _hasError(false)
      19           14 :     , _result(nullptr)
      20           14 :     , downloader(nullptr)
      21              : {
      22           14 : }
      23              : 
      24           16 : GoogleNewsSitemapSynthesizer::~GoogleNewsSitemapSynthesizer()
      25              : {
      26           16 : }
      27              : 
      28            3 : QStringList GoogleNewsSitemapSynthesizer::newsSitemapPaths()
      29              : {
      30              :     return {
      31              :         "/news-sitemap.xml",
      32              :         "/sitemap_news.xml",
      33              :         "/news-sitemap-content.xml"
      34           12 :     };
      35            3 : }
      36              : 
      37            3 : void GoogleNewsSitemapSynthesizer::synthesize(const QUrl& siteUrl, const QString& siteTitle)
      38              : {
      39            3 :     isRefresh = false;
      40            3 :     feedTitle = siteTitle;
      41            3 :     since = QDateTime();
      42              : 
      43              :     // Build base URL for probing.
      44            3 :     siteBaseUrl.setScheme(siteUrl.scheme());
      45            3 :     siteBaseUrl.setHost(siteUrl.host());
      46              : 
      47            6 :     qCDebug(logUtility) << "GoogleNewsSitemapSynthesizer: starting discovery for" << siteBaseUrl;
      48              : 
      49              :     // Start by fetching robots.txt to discover news sitemap URLs.
      50            3 :     fetchRobotsTxt();
      51            3 : }
      52              : 
      53            0 : void GoogleNewsSitemapSynthesizer::synthesize(const QUrl& sitemapUrl, const QString& feedTitle,
      54              :                                         const QDateTime& since)
      55              : {
      56            0 :     isRefresh = true;
      57            0 :     this->feedTitle = feedTitle;
      58            0 :     this->since = since;
      59              : 
      60              :     // For refresh, we already know the exact sitemap URL.
      61            0 :     candidateUrls.clear();
      62            0 :     candidateUrls.append(sitemapUrl);
      63              : 
      64            0 :     siteBaseUrl.setScheme(sitemapUrl.scheme());
      65            0 :     siteBaseUrl.setHost(sitemapUrl.host());
      66              : 
      67            0 :     qCDebug(logUtility) << "GoogleNewsSitemapSynthesizer: refreshing from" << sitemapUrl
      68            0 :                         << "since" << since;
      69              : 
      70            0 :     tryNextCandidate();
      71            0 : }
      72              : 
      73            3 : void GoogleNewsSitemapSynthesizer::fetchRobotsTxt()
      74              : {
      75            3 :     QUrl robotsUrl = siteBaseUrl;
      76            3 :     robotsUrl.setPath("/robots.txt");
      77              : 
      78            6 :     qCDebug(logUtility) << "GoogleNewsSitemapSynthesizer: fetching" << robotsUrl;
      79              : 
      80            3 :     downloader = new NetworkDownloadCore({}, this, nullptr);
      81            3 :     connect(downloader, &NetworkDownloadCore::finished,
      82            3 :             this, &GoogleNewsSitemapSynthesizer::onRobotsTxtDownloaded);
      83            3 :     connect(downloader, &NetworkDownloadCore::error,
      84            3 :             this, &GoogleNewsSitemapSynthesizer::onRobotsTxtDownloadError);
      85            3 :     downloader->download(robotsUrl);
      86            3 : }
      87              : 
      88           11 : QList<QUrl> GoogleNewsSitemapSynthesizer::parseRobotsSitemaps(const QString& robotsTxt,
      89              :                                                          const QUrl& siteBaseUrl)
      90              : {
      91           11 :     QList<QUrl> newsSitemaps;
      92           11 :     QList<QUrl> genericSitemaps;
      93           11 :     QStringList lines = robotsTxt.split('\n');
      94              : 
      95           58 :     for (const QString& line : lines) {
      96           47 :         QString trimmed = line.trimmed();
      97           47 :         if (trimmed.startsWith("Sitemap:", Qt::CaseInsensitive)) {
      98           33 :             QString urlStr = trimmed.mid(8).trimmed();
      99           33 :             QUrl url(urlStr);
     100           33 :             if (url.isValid() && stripWww(url.host()) == stripWww(siteBaseUrl.host())) {
     101           32 :                 if (url.path().contains("news", Qt::CaseInsensitive)) {
     102           12 :                     newsSitemaps.append(url);
     103              :                 } else {
     104           20 :                     genericSitemaps.append(url);
     105              :                 }
     106              :             }
     107           33 :         }
     108           47 :     }
     109              : 
     110              :     // News-specific sitemaps first, then generic ones (which may be sitemap
     111              :     // indexes that reference a news sitemap, e.g. ESPN's /sitemap.xml).
     112           11 :     newsSitemaps.append(genericSitemaps);
     113           22 :     return newsSitemaps;
     114           11 : }
     115              : 
     116            0 : void GoogleNewsSitemapSynthesizer::onRobotsTxtDownloaded(const QUrl& url, const QByteArray& data)
     117              : {
     118              :     Q_UNUSED(url);
     119              : 
     120            0 :     QString robotsTxt = QString::fromUtf8(data);
     121            0 :     QList<QUrl> robotsSitemaps = parseRobotsSitemaps(robotsTxt, siteBaseUrl);
     122              : 
     123            0 :     qCDebug(logUtility) << "GoogleNewsSitemapSynthesizer: found" << robotsSitemaps.size()
     124            0 :                         << "news sitemaps in robots.txt";
     125              : 
     126            0 :     buildCandidateUrls(robotsSitemaps);
     127            0 :     tryNextCandidate();
     128            0 : }
     129              : 
     130            3 : void GoogleNewsSitemapSynthesizer::onRobotsTxtDownloadError(const QUrl& url, const QString& errorString)
     131              : {
     132              :     Q_UNUSED(url);
     133            6 :     qCDebug(logUtility) << "GoogleNewsSitemapSynthesizer: robots.txt fetch failed:" << errorString
     134            3 :                         << ", trying well-known paths";
     135              : 
     136            3 :     buildCandidateUrls({});
     137            3 :     tryNextCandidate();
     138            3 : }
     139              : 
     140            3 : void GoogleNewsSitemapSynthesizer::buildCandidateUrls(const QList<QUrl>& robotsSitemaps)
     141              : {
     142            3 :     candidateUrls.clear();
     143              : 
     144              :     // Robots.txt sitemaps first (most reliable).
     145            3 :     for (const QUrl& url : robotsSitemaps) {
     146            0 :         candidateUrls.append(url);
     147              :     }
     148              : 
     149              :     // Then well-known paths as fallback.
     150           12 :     for (const QString& path : newsSitemapPaths()) {
     151            9 :         QUrl candidate = siteBaseUrl;
     152            9 :         candidate.setPath(path);
     153              :         // Avoid duplicates from robots.txt.
     154            9 :         if (!candidateUrls.contains(candidate)) {
     155            9 :             candidateUrls.append(candidate);
     156              :         }
     157           12 :     }
     158              : 
     159            6 :     qCDebug(logUtility) << "GoogleNewsSitemapSynthesizer: probing" << candidateUrls.size()
     160            3 :                         << "candidate URLs";
     161            3 : }
     162              : 
     163           12 : void GoogleNewsSitemapSynthesizer::tryNextCandidate()
     164              : {
     165           12 :     if (candidateUrls.isEmpty()) {
     166            3 :         reportError("No feed found");
     167            3 :         return;
     168              :     }
     169              : 
     170            9 :     QUrl url = candidateUrls.takeFirst();
     171           18 :     qCDebug(logUtility) << "GoogleNewsSitemapSynthesizer: trying" << url;
     172              : 
     173            9 :     downloader = new NetworkDownloadCore({}, this, nullptr);
     174            9 :     connect(downloader, &NetworkDownloadCore::finished,
     175            9 :             this, &GoogleNewsSitemapSynthesizer::onCandidateDownloaded);
     176            9 :     connect(downloader, &NetworkDownloadCore::error,
     177            9 :             this, &GoogleNewsSitemapSynthesizer::onCandidateDownloadError);
     178            9 :     downloader->download(url);
     179            9 : }
     180              : 
     181            0 : void GoogleNewsSitemapSynthesizer::onCandidateDownloaded(const QUrl& url, const QByteArray& data)
     182              : {
     183            0 :     QString xml = QString::fromUtf8(data);
     184            0 :     SitemapParser parser(this);
     185            0 :     SitemapParser::SitemapType type = parser.parse(xml);
     186              : 
     187            0 :     if (type == SitemapParser::Invalid) {
     188            0 :         qCDebug(logUtility) << "GoogleNewsSitemapSynthesizer: invalid XML from" << url
     189            0 :                             << ", trying next candidate";
     190            0 :         tryNextCandidate();
     191            0 :         return;
     192              :     }
     193              : 
     194            0 :     if (type == SitemapParser::SitemapIndex) {
     195              :         // Store sub-sitemaps sorted by lastmod descending (most recent first).
     196            0 :         sitemapIndexUrl = url;
     197            0 :         accumulatedEntries.clear();
     198            0 :         pendingSubSitemaps = parser.subSitemaps();
     199            0 :         if (pendingSubSitemaps.isEmpty()) {
     200            0 :             qCDebug(logUtility) << "GoogleNewsSitemapSynthesizer: empty sitemap index from" << url;
     201            0 :             tryNextCandidate();
     202            0 :             return;
     203              :         }
     204              : 
     205            0 :         std::sort(pendingSubSitemaps.begin(), pendingSubSitemaps.end(),
     206            0 :             [](const SubSitemap& a, const SubSitemap& b) {
     207            0 :                 bool aValid = a.lastmod.isValid();
     208            0 :                 bool bValid = b.lastmod.isValid();
     209            0 :                 if (aValid && bValid) {
     210            0 :                     return a.lastmod > b.lastmod;
     211              :                 }
     212            0 :                 return aValid && !bValid;
     213              :             });
     214              : 
     215            0 :         qCDebug(logUtility) << "GoogleNewsSitemapSynthesizer: sitemap index with"
     216            0 :                             << pendingSubSitemaps.size() << "sub-sitemaps";
     217              : 
     218            0 :         tryNextSubSitemap();
     219            0 :         return;
     220              :     }
     221              : 
     222              :     // UrlSet - check for Google News entries.
     223            0 :     if (!parser.hasNewsEntries()) {
     224            0 :         qCDebug(logUtility) << "GoogleNewsSitemapSynthesizer: urlset without news entries from" << url;
     225            0 :         tryNextCandidate();
     226            0 :         return;
     227              :     }
     228              : 
     229            0 :     processParsedEntries(parser.entries(), url);
     230            0 : }
     231              : 
     232            9 : void GoogleNewsSitemapSynthesizer::onCandidateDownloadError(const QUrl& url, const QString& errorString)
     233              : {
     234           18 :     qCDebug(logUtility) << "GoogleNewsSitemapSynthesizer: download error for" << url
     235            9 :                         << ":" << errorString;
     236            9 :     tryNextCandidate();
     237            9 : }
     238              : 
     239            0 : void GoogleNewsSitemapSynthesizer::tryNextSubSitemap()
     240              : {
     241            0 :     if (pendingSubSitemaps.isEmpty()) {
     242            0 :         if (!accumulatedEntries.isEmpty()) {
     243              :             // Deduplicate repetitive wire content, then process.
     244            0 :             QList<SitemapEntry> deduped = deduplicateRepetitiveTitles(accumulatedEntries);
     245            0 :             qCDebug(logUtility) << "GoogleNewsSitemapSynthesizer: accumulated"
     246            0 :                                 << accumulatedEntries.size() << "entries from sub-sitemaps,"
     247            0 :                                 << deduped.size() << "after dedup";
     248            0 :             processParsedEntries(deduped, sitemapIndexUrl);
     249            0 :             return;
     250            0 :         }
     251              :         // None of the sub-sitemaps had news entries.
     252            0 :         qCDebug(logUtility) << "GoogleNewsSitemapSynthesizer: no sub-sitemaps with news entries";
     253            0 :         tryNextCandidate();
     254            0 :         return;
     255              :     }
     256              : 
     257            0 :     SubSitemap sub = pendingSubSitemaps.takeFirst();
     258            0 :     qCDebug(logUtility) << "GoogleNewsSitemapSynthesizer: trying sub-sitemap" << sub.url;
     259              : 
     260            0 :     downloader = new NetworkDownloadCore({}, this, nullptr);
     261            0 :     connect(downloader, &NetworkDownloadCore::finished,
     262            0 :             this, &GoogleNewsSitemapSynthesizer::onSubSitemapDownloaded);
     263            0 :     connect(downloader, &NetworkDownloadCore::error,
     264            0 :             this, &GoogleNewsSitemapSynthesizer::onSubSitemapDownloadError);
     265            0 :     downloader->download(sub.url);
     266            0 : }
     267              : 
     268            0 : void GoogleNewsSitemapSynthesizer::onSubSitemapDownloaded(const QUrl& url, const QByteArray& data)
     269              : {
     270            0 :     QString xml = QString::fromUtf8(data);
     271            0 :     SitemapParser parser(this);
     272            0 :     SitemapParser::SitemapType type = parser.parse(xml);
     273              : 
     274            0 :     if (type == SitemapParser::UrlSet && parser.hasNewsEntries()) {
     275            0 :         qCDebug(logUtility) << "GoogleNewsSitemapSynthesizer: sub-sitemap" << url
     276            0 :                             << "has" << parser.entries().size() << "news entries, accumulating";
     277            0 :         accumulatedEntries.append(parser.entries());
     278              :     } else {
     279            0 :         qCDebug(logUtility) << "GoogleNewsSitemapSynthesizer: sub-sitemap" << url
     280            0 :                             << "has no news entries, skipping";
     281              :     }
     282              : 
     283            0 :     tryNextSubSitemap();
     284            0 : }
     285              : 
     286            0 : void GoogleNewsSitemapSynthesizer::onSubSitemapDownloadError(const QUrl& url, const QString& errorString)
     287              : {
     288            0 :     qCDebug(logUtility) << "GoogleNewsSitemapSynthesizer: sub-sitemap download error for" << url
     289            0 :                         << ":" << errorString;
     290            0 :     tryNextSubSitemap();
     291            0 : }
     292              : 
     293            0 : QString GoogleNewsSitemapSynthesizer::normalizeLanguage(const QString& lang)
     294              : {
     295              :     // Normalize ISO 639-3 codes to ISO 639-1. Sites use both:
     296              :     // AP News uses "eng"/"spa", BBC uses "en"/"bn"/"hi".
     297              :     static const QMap<QString, QString> iso639_3to1 = {
     298              :         {"eng", "en"}, {"spa", "es"}, {"fra", "fr"}, {"deu", "de"},
     299              :         {"por", "pt"}, {"ita", "it"}, {"jpn", "ja"}, {"zho", "zh"},
     300              :         {"kor", "ko"}, {"ara", "ar"}, {"hin", "hi"}, {"rus", "ru"}
     301            0 :     };
     302              : 
     303            0 :     QString normalized = lang.toLower().section('-', 0, 0);
     304            0 :     if (iso639_3to1.contains(normalized)) {
     305            0 :         return iso639_3to1.value(normalized);
     306              :     }
     307            0 :     return normalized;
     308            0 : }
     309              : 
     310            0 : void GoogleNewsSitemapSynthesizer::processParsedEntries(const QList<SitemapEntry>& entries,
     311              :                                                    const QUrl& sourceUrl)
     312              : {
     313            0 :     feedSourceUrl = sourceUrl;
     314              : 
     315              :     // Filter to only entries with news:title.
     316            0 :     QList<SitemapEntry> newsEntries;
     317            0 :     for (const SitemapEntry& entry : entries) {
     318            0 :         if (!entry.newsTitle.isEmpty()) {
     319            0 :             newsEntries.append(entry);
     320              :         }
     321              :     }
     322              : 
     323            0 :     if (newsEntries.isEmpty()) {
     324            0 :         reportError("No feed found");
     325            0 :         return;
     326              :     }
     327              : 
     328              :     // Use the publication name as the feed title if we don't have a better one.
     329            0 :     if (!newsEntries.first().publicationName.isEmpty()) {
     330            0 :         QString pubName = newsEntries.first().publicationName;
     331              :         // Only override generic/empty titles (host names, etc.)
     332            0 :         if (feedTitle.isEmpty() || feedTitle == siteBaseUrl.host()) {
     333            0 :             feedTitle = pubName;
     334              :         }
     335            0 :     }
     336              : 
     337              :     // Filter by language: find the majority language and keep only matching entries.
     338            0 :     QMap<QString, int> langCounts;
     339            0 :     for (const SitemapEntry& entry : newsEntries) {
     340            0 :         if (!entry.language.isEmpty()) {
     341            0 :             langCounts[normalizeLanguage(entry.language)]++;
     342              :         }
     343              :     }
     344              : 
     345            0 :     if (!langCounts.isEmpty()) {
     346            0 :         QString majorityLang;
     347            0 :         int maxCount = 0;
     348            0 :         for (auto it = langCounts.cbegin(); it != langCounts.cend(); ++it) {
     349            0 :             if (it.value() > maxCount) {
     350            0 :                 maxCount = it.value();
     351            0 :                 majorityLang = it.key();
     352              :             }
     353              :         }
     354              : 
     355            0 :         QList<SitemapEntry> filtered;
     356            0 :         for (const SitemapEntry& entry : newsEntries) {
     357            0 :             if (entry.language.isEmpty()
     358            0 :                 || normalizeLanguage(entry.language) == majorityLang) {
     359            0 :                 filtered.append(entry);
     360              :             }
     361              :         }
     362            0 :         newsEntries = filtered;
     363              : 
     364            0 :         qCDebug(logUtility) << "GoogleNewsSitemapSynthesizer: filtered to language"
     365            0 :                             << majorityLang << "(" << newsEntries.size() << "entries)";
     366            0 :     }
     367              : 
     368              :     // Sort by publication date descending.
     369            0 :     std::sort(newsEntries.begin(), newsEntries.end(),
     370            0 :         [](const SitemapEntry& a, const SitemapEntry& b) {
     371              :             // Prefer publicationDate, fall back to lastmod.
     372            0 :             QDateTime dateA = a.publicationDate.isValid() ? a.publicationDate : a.lastmod;
     373            0 :             QDateTime dateB = b.publicationDate.isValid() ? b.publicationDate : b.lastmod;
     374            0 :             bool aValid = dateA.isValid();
     375            0 :             bool bValid = dateB.isValid();
     376            0 :             if (aValid && bValid) {
     377            0 :                 return dateA > dateB;
     378              :             }
     379            0 :             return aValid && !bValid;
     380            0 :         });
     381              : 
     382              :     // Filter by since date for refresh.
     383            0 :     if (isRefresh && since.isValid()) {
     384            0 :         QList<SitemapEntry> recent;
     385            0 :         for (const SitemapEntry& entry : newsEntries) {
     386            0 :             QDateTime date = entry.publicationDate.isValid()
     387            0 :                 ? entry.publicationDate : entry.lastmod;
     388            0 :             if (date.isValid() && date > since) {
     389            0 :                 recent.append(entry);
     390              :             }
     391            0 :         }
     392              : 
     393            0 :         if (recent.isEmpty()) {
     394              :             // No new entries since last refresh - emit empty feed.
     395            0 :             _result = new RawFeed(this);
     396            0 :             _result->feedType = RawFeed::GoogleNewsSitemap;
     397            0 :             _result->title = feedTitle;
     398            0 :             _result->url = feedSourceUrl;
     399            0 :             emit done();
     400            0 :             return;
     401              :         }
     402              : 
     403            0 :         newsEntries = recent;
     404            0 :     }
     405              : 
     406              :     // Limit to MAX_ENTRIES.
     407            0 :     if (newsEntries.size() > MAX_ENTRIES) {
     408            0 :         newsEntries = newsEntries.mid(0, MAX_ENTRIES);
     409              :     }
     410              : 
     411            0 :     feedEntries = newsEntries;
     412              : 
     413            0 :     qCDebug(logUtility) << "GoogleNewsSitemapSynthesizer: selected" << feedEntries.size()
     414            0 :                         << "entries from" << feedSourceUrl;
     415              : 
     416            0 :     buildRawFeed();
     417            0 :     emit done();
     418            0 : }
     419              : 
     420            0 : void GoogleNewsSitemapSynthesizer::buildRawFeed()
     421              : {
     422            0 :     _result = new RawFeed(this);
     423            0 :     _result->feedType = RawFeed::GoogleNewsSitemap;
     424            0 :     _result->title = feedTitle;
     425            0 :     _result->url = feedSourceUrl;
     426            0 :     _result->siteURL = QUrl(siteBaseUrl.scheme() + "://" + siteBaseUrl.host());
     427              : 
     428            0 :     for (const SitemapEntry& entry : feedEntries) {
     429            0 :         auto* item = new RawNews(_result);
     430            0 :         item->guid = entry.url.toString();
     431            0 :         item->title = entry.newsTitle;
     432            0 :         item->url = entry.url;
     433            0 :         item->author = QString("");
     434            0 :         item->timestamp = entry.publicationDate.isValid()
     435            0 :             ? entry.publicationDate
     436            0 :             : (entry.lastmod.isValid() ? entry.lastmod : QDateTime::currentDateTime());
     437              : 
     438              :         // Embed the sitemap thumbnail image in the content so it flows
     439              :         // through the normal HTML sanitizer and image pipeline.
     440            0 :         if (entry.imageUrl.isValid()) {
     441            0 :             item->content = "<img src=\"" + entry.imageUrl.toString() + "\"/>";
     442              :         } else {
     443            0 :             item->content = QString("");
     444              :         }
     445            0 :         item->description = QString("");
     446              : 
     447            0 :         _result->items.append(item);
     448              :     }
     449              : 
     450            0 :     qCDebug(logUtility) << "GoogleNewsSitemapSynthesizer: built feed with"
     451            0 :                         << _result->items.size() << "items";
     452            0 : }
     453              : 
     454           12 : void GoogleNewsSitemapSynthesizer::setResultState(RawFeed* result, bool hasError,
     455              :                                                    const QString& errorString)
     456              : {
     457           12 :     _result = result;
     458           12 :     _hasError = hasError;
     459           12 :     _errorString = errorString;
     460           12 : }
     461              : 
     462            4 : QList<SitemapEntry> GoogleNewsSitemapSynthesizer::deduplicateRepetitiveTitles(
     463              :     const QList<SitemapEntry>& entries, int prefixWordCount, int repetitionThreshold)
     464              : {
     465              :     // Group entries by their first N words (lowercased).
     466            4 :     QMap<QString, QList<int>> prefixGroups;
     467           37 :     for (int i = 0; i < entries.size(); ++i) {
     468           33 :         QStringList words = entries[i].newsTitle.toLower().split(' ', Qt::SkipEmptyParts);
     469           33 :         QString key;
     470           33 :         if (words.size() >= prefixWordCount) {
     471           33 :             key = QStringList(words.mid(0, prefixWordCount)).join(' ');
     472              :         } else {
     473            0 :             key = words.join(' ');
     474              :         }
     475           33 :         prefixGroups[key].append(i);
     476           33 :     }
     477              : 
     478              :     // Build a set of indices to exclude (all but the most recent in large groups).
     479            4 :     QSet<int> excluded;
     480           22 :     for (auto it = prefixGroups.cbegin(); it != prefixGroups.cend(); ++it) {
     481           18 :         const QList<int>& indices = it.value();
     482           18 :         if (indices.size() <= repetitionThreshold) {
     483           16 :             continue;
     484              :         }
     485              : 
     486              :         // Find the most recent entry in this group.
     487            2 :         int bestIdx = indices.first();
     488            2 :         QDateTime bestDate;
     489           17 :         for (int idx : indices) {
     490           15 :             QDateTime date = entries[idx].publicationDate.isValid()
     491           15 :                 ? entries[idx].publicationDate : entries[idx].lastmod;
     492           15 :             if (!bestDate.isValid() || (date.isValid() && date > bestDate)) {
     493            3 :                 bestDate = date;
     494            3 :                 bestIdx = idx;
     495              :             }
     496           15 :         }
     497              : 
     498              :         // Exclude all but the best.
     499           17 :         for (int idx : indices) {
     500           15 :             if (idx != bestIdx) {
     501           13 :                 excluded.insert(idx);
     502              :             }
     503              :         }
     504            2 :     }
     505              : 
     506              :     // Build filtered list preserving original order.
     507            4 :     QList<SitemapEntry> result;
     508           37 :     for (int i = 0; i < entries.size(); ++i) {
     509           33 :         if (!excluded.contains(i)) {
     510           20 :             result.append(entries[i]);
     511              :         }
     512              :     }
     513            8 :     return result;
     514            4 : }
     515              : 
     516            3 : void GoogleNewsSitemapSynthesizer::reportError(const QString& error)
     517              : {
     518            3 :     _hasError = true;
     519            3 :     _errorString = error;
     520            6 :     qCDebug(logUtility) << "GoogleNewsSitemapSynthesizer error:" << error;
     521            3 :     emit done();
     522            3 : }
        

Generated by: LCOV version 2.0-1