LCOV - code coverage report
Current view: top level - lib/FangFeedDiscovery - FeedDiscovery.h (source / functions) Coverage Total Hit
Test: coverage.info.cleaned Lines: 88.9 % 9 8
Test Date: 2026-04-19 00:35:54 Functions: 100.0 % 6 6

            Line data    Source code
       1              : #ifndef FEEDDISCOVERY_H
       2              : #define FEEDDISCOVERY_H
       3              : 
       4              : #include <memory>
       5              : 
       6              : #include <QList>
       7              : #include <QObject>
       8              : #include <QString>
       9              : #include <QStringList>
      10              : #include <QTimer>
      11              : #include <QUrl>
      12              : 
      13              : class QSimpleStateMachine;
      14              : 
      15              : #include "RawFeed.h"
      16              : 
      17              : class FeedSource;
      18              : class BatchFeedFetcher;
      19              : class WebPageGrabber;
      20              : class NewsSitemapSynthesizer;
      21              : 
      22              : /*!
      23              :     \brief Attempts to match a user-submitted, URL like "bob.com" to an actual news feed.
      24              :     This is done by massaging the URL so that Qt can fetch a document.  If that document is
      25              :     a web page, we search for a link to the RSS or Atom feed in the HTML.  Then we check
      26              :     that document to see if it's an RSS feed.
      27              : 
      28              :     HTTP and HTML redirects are handled.
      29              : 
      30              :     State machine diagram:
      31              : 
      32              :     CHECK_FEED
      33              :         |  \- ERROR
      34              :         |
      35              :      TRY_FEED
      36              :         |  \- FEED_FOUND
      37              :         |
      38              :     WEB_GRABBER
      39              :         |  \- VALIDATE_FEEDS -> FEED_FOUND
      40              :         |
      41              :     TRY_COMMON_PATHS
      42              :         |  \- FEED_FOUND
      43              :         |
      44              :     TRY_NEWS_SITEMAP
      45              :         |  \- FEED_FOUND
      46              :         |  \- FEED_ERROR
      47              :  */
      48              : class FeedDiscovery : public QObject
      49              : {
      50              :     Q_OBJECT
      51              :     
      52              : private:
      53              : 
      54              :     enum FeedDiscoveryState {
      55              :         CHECK_FEED,
      56              :         TRY_FEED,
      57              :         WEB_GRABBER,
      58              :         VALIDATE_FEEDS,      // Bulk feed validation
      59              :         TRY_COMMON_PATHS,    // Probe well-known RSS paths (/feed, /rss, etc.)
      60              :         TRY_NEWS_SITEMAP, // News sitemap-based feed synthesis
      61              :         FEED_FOUND,
      62              :         FEED_ERROR
      63              :     };
      64              :     
      65              : public:
      66              :     enum class Error {
      67              :         None,
      68              :         InvalidURL,
      69              :         NoFeedsFound,
      70              :         NetworkError,
      71              :         Timeout
      72              :     };
      73              : 
      74              :     /*!
      75              :         \brief Structure to hold a discovered feed with metadata
      76              :      */
      77              :     struct DiscoveredFeed {
      78              :         QUrl url;                          // Feed URL
      79              :         QString title;                     // Feed title (from parsed feed or URL)
      80              :         std::shared_ptr<RawFeed> feed;     // Parsed feed (nullptr if not yet parsed)
      81              :         bool validated = false;            // Has this feed been successfully parsed?
      82              :     };
      83              : 
      84              :     explicit FeedDiscovery(QObject *parent = nullptr,
      85              :                           FeedSource* firstParser = nullptr,
      86              :                           WebPageGrabber* pageGrabber = nullptr,
      87              :                           BatchFeedFetcher* feedParser = nullptr,
      88              :                           NewsSitemapSynthesizer* sitemapSynthesizer = nullptr);
      89              :     virtual ~FeedDiscovery();
      90              : 
      91              :     /*!
      92              :         \return After done(), this returns the error code (None if successful).
      93              :      */
      94           39 :     Error error() { return _error; }
      95              : 
      96              :     /*!
      97              :         \return After done(), this returns the error string, if there was an error.
      98              :      */
      99           14 :     QString errorString() { return _errorString; }
     100              : 
     101              :     /*!
     102              :         \return The best feed URL, or an empty URL if there was an error.
     103              :      */
     104           24 :     QUrl feedURL() { return _error != Error::None ? QUrl("") : _feedURL; }
     105              : 
     106              :     /*!
     107              :         \return The best raw feed, or nullptr if there was an error.
     108              :      */
     109            9 :     std::shared_ptr<RawFeed> feedResult() {
     110            9 :         if (_error != Error::None || _discoveredFeeds.isEmpty()) {
     111            0 :             return nullptr;
     112              :         }
     113            9 :         return _discoveredFeeds.first().feed;
     114              :     }
     115              : 
     116              :     /*!
     117              :         \return List of all discovered feeds (may be empty if error or single-feed mode)
     118              :      */
     119            7 :     QList<DiscoveredFeed> discoveredFeeds() const { return _discoveredFeeds; }
     120              : 
     121              :     /*!
     122              :         \return Number of feeds discovered (0 = error or single-feed mode, 1+ = multi-feed)
     123              :      */
     124            9 :     int feedCount() const { return _discoveredFeeds.count(); }
     125              :     
     126              : signals:
     127              : 
     128              :     /*!
     129              :         \brief Completion signal. Check for error; if false, get feedURL()
     130              :         \param feedDiscovery
     131              :      */
     132              :     void done(FeedDiscovery* feedDiscovery);
     133              :     
     134              : public slots:
     135              : 
     136              :     /*!
     137              :         \brief Call this with a feed URL to check to get started!  Wait for done()
     138              :         \param sURL
     139              :      */
     140              :     virtual void checkFeed(QString sURL);
     141              : 
     142              :     /*!
     143              :         \brief Try to find RSS and Atom feed(s), if available.
     144              :         \param document
     145              :      */
     146              :     QList<QString> parseFeedsFromXHTML(const QString& document);
     147              :     
     148              : private slots:
     149              : 
     150              :     // State change slots:
     151              :     void onTryFeed();
     152              :     void onFeedFound();
     153              :     void onWebGrabber();
     154              :     void onValidateFeeds();    // Bulk feed validation
     155              :     void onTryCommonPaths();   // Probe well-known RSS paths
     156              :     void onTryNewsSitemap(); // News sitemap-based feed synthesis
     157              :     void onError();
     158              : 
     159              :     // Parser/BulkParser slots:
     160              :     void onFirstParseDone();
     161              :     void onFeedParserReady();
     162              : 
     163              :     // WebPageGrabber slots:
     164              :     void onPageGrabberReady(WebPageGrabber* grabber, QString* document);
     165              : 
     166              :     // NewsSitemapSynthesizer slots:
     167              :     void onNewsSitemapDone();
     168              : 
     169              :     // Timeout slot:
     170              :     void onTimeout();
     171              : 
     172              : protected:
     173              :     FeedSource* parserFirstTry;
     174              :     WebPageGrabber* pageGrabber;            // For fetching HTML pages
     175              :     BatchFeedFetcher* feedParser;            // For bulk feed parsing
     176              : 
     177              : private:
     178              :     // Common RSS/Atom paths to probe when no feeds are found in HTML.
     179              :     static QStringList commonFeedPaths();
     180              : 
     181              :     // Sets the error code, error string, and triggers the ERROR state.
     182              :     void reportError(Error error, const QString& errorString = {});
     183              : 
     184              :     QSimpleStateMachine* machine;
     185              : 
     186              :     QUrl _feedURL;
     187              :     Error _error;
     188              :     QString _errorString;
     189              : 
     190              :     // Multi-feed discovery state
     191              :     QList<DiscoveredFeed> _discoveredFeeds;  // All discovered feeds
     192              :     QList<QUrl> _sortedFeedURLs;             // Feed URLs to validate (sorted by path length)
     193              : 
     194              :     // Common path probing state
     195              :     bool _probingCommonPaths;
     196              : 
     197              :     // Sitemap state
     198              :     NewsSitemapSynthesizer* newsSitemapSynthesizer;
     199              :     QString _pageXHTML;  // Stored XHTML from web grabber for sitemap fallback
     200              : 
     201              :     // Overall discovery timeout
     202              :     QTimer timeoutTimer;
     203              : };
     204              : 
     205              : #endif // FEEDDISCOVERY_H
        

Generated by: LCOV version 2.0-1