LCOV - code coverage report
Current view: top level - lib/FangFeedDiscovery - WebPageGrabber.h (source / functions) Coverage Total Hit
Test: coverage.info.cleaned Lines: 33.3 % 3 1
Test Date: 2026-04-19 00:35:54 Functions: 33.3 % 3 1

            Line data    Source code
       1              : #ifndef WEBPAGEGRABBER_H
       2              : #define WEBPAGEGRABBER_H
       3              : 
       4              : #include <QObject>
       5              : #include <QString>
       6              : #include <QUrl>
       7              : #include <QTimer>
       8              : class QWebDownload;
       9              : class QNetworkAccessManager;
      10              : 
      11              : /*!
      12              :     \brief Loads a web page at a given URL and signals with the XHTML document when done.
      13              : 
      14              :     Note that this class is not rentrant.
      15              :  */
      16              : class WebPageGrabber : public QObject
      17              : {
      18              :     Q_OBJECT
      19              : public:
      20              :     /*!
      21              :         \brief WebPageGrabber creates an XHTML document from either a string or a URL.
      22              :         \param handleMetaRefresh If true, handles refreshes from within HTML documents rather than
      23              :                                  just HTTP communication.
      24              :         \param timeoutMS         Timeout after last download activity in milliseconds
      25              :         \param parent
      26              :         \param networkManager    Optional network manager for dependency injection (for testing)
      27              :      */
      28              :     explicit WebPageGrabber(bool handleMetaRefresh = defaultHandleMetaRefresh,
      29              :                             int timeoutMS = defaultTimeoutMs,
      30              :                             QObject *parent = nullptr,
      31              :                             QNetworkAccessManager* networkManager = nullptr);
      32              : 
      33              :     /*!
      34              :      * \brief Just like the above but with all the defaults specified *except* for parent.
      35              :      */
      36              :     explicit WebPageGrabber(QObject *parent);
      37              : 
      38              :     /*!
      39              :         \brief Convert raw HTML bytes to XHTML via TidyLib.
      40              :         \return The XHTML string, or empty string on failure.
      41              :      */
      42              :     static QString htmlToXhtml(const QByteArray& html);
      43              : 
      44              : signals:
      45              :     // If you requested a URL, ready() will be emitted when it's ready!
      46              :     // If document is null, an error happened. :(
      47              :     void ready(WebPageGrabber* grabber, QString* document);
      48              : 
      49              : public slots:
      50              :     // Fetches the webpage and emits ready() with the XHTML document.
      51              :     // Signals with null on an error.
      52              :     virtual void load(const QUrl &url);
      53              : 
      54              :     // Load the HTML string into a Tidy'd XHTML document and returns it (no signal is emmitted.)
      55              :     // Returns null on an error.
      56              :     QString* load(const QString& htmlString);
      57              : 
      58              :     // Returns the previously loaded document, or the empty string if there was an error.
      59            3 :     inline QString* getDocument() { return error ? nullptr : &document; }
      60              : 
      61            0 :     inline QUrl getOriginalURL() const { return originalUrl; }
      62              : 
      63            0 :     inline bool isDone() const { return done; }
      64              :     
      65              : private slots:
      66              :     // Internal load methods.
      67              :     void loadInternal(const QUrl &url);
      68              :     QString* loadInternal(const QString& htmlString, bool handleRefresh);
      69              : 
      70              :     // Uh oh, an error!
      71              :     void onDownloadError(const QUrl& url, const QString& errorString);
      72              : 
      73              :     // We got some HTTP content!
      74              :     void onDownloadFinished(const QUrl& url, const QByteArray& data);
      75              : 
      76              :     // Searches the XHTML'd document for a redirect URL.
      77              :     // Returns the redirect URL, or the empty string.
      78              :     QString searchForRedirect(const QString& document);
      79              : 
      80              :     // Handles the boilerplate involved with emitting the ready() signal.
      81              :     void emitReadySignal(QString* document);
      82              :     
      83              : private:
      84              :     static constexpr bool defaultHandleMetaRefresh = true;
      85              :     static constexpr int defaultTimeoutMs = 5000;
      86              :     static constexpr int maxRedirects = 10;
      87              : 
      88              :     void init();
      89              : 
      90              :     QWebDownload* core;
      91              :     QString document;
      92              :     bool handleMetaRefresh;
      93              :     int redirectAttempts;
      94              :     bool error;
      95              :     QUrl originalUrl;
      96              :     bool done;
      97              : };
      98              : 
      99              : #endif // WEBPAGEGRABBER_H
        

Generated by: LCOV version 2.0-1