Line data Source code
1 : #ifndef WEBPAGEGRABBER_H
2 : #define WEBPAGEGRABBER_H
3 :
4 : #include <QObject>
5 : #include <QString>
6 : #include <QUrl>
7 : #include <QTimer>
8 : #include "../FangObject.h"
9 : #include "../network/NetworkDownloadCore.h"
10 :
11 : #define DEFAULT_HANDLE_META_REFRESH true
12 : #define DEFAULT_TIMEOUT_MS 5000
13 : #define MAX_REDIRECTS 10
14 :
15 : /*!
16 : \brief Loads a web page at a given URL and signals with the XHTML document when done.
17 :
18 : Note that this class is not rentrant.
19 : */
20 : class WebPageGrabber : public FangObject
21 : {
22 : Q_OBJECT
23 : public:
24 : /*!
25 : \brief WebPageGrabber creates an XHTML document from either a string or a URL.
26 : \param handleMetaRefresh If true, handles refreshes from within HTML documents rather than
27 : just HTTP communication.
28 : \param timeoutMS Timeout after last download activity in milliseconds
29 : \param parent
30 : \param networkManager Optional network manager for dependency injection (for testing)
31 : */
32 : explicit WebPageGrabber(bool handleMetaRefresh = DEFAULT_HANDLE_META_REFRESH,
33 : int timeoutMS = DEFAULT_TIMEOUT_MS,
34 : QObject *parent = nullptr,
35 : QNetworkAccessManager* networkManager = nullptr);
36 :
37 : /*!
38 : * \brief Just like the above but with all the defaults specified *except* for parent.
39 : */
40 : explicit WebPageGrabber(QObject *parent);
41 :
42 :
43 : signals:
44 : // If you requested a URL, ready() will be emitted when it's ready!
45 : // If document is null, an error happened. :(
46 : void ready(WebPageGrabber* grabber, QString* document);
47 :
48 : public slots:
49 : // Fetches the webpage and emits ready() with the XHTML document.
50 : // Signals with null on an error.
51 : virtual void load(const QUrl &url);
52 :
53 : // Load the HTML string into a Tidy'd XHTML document and returns it (no signal is emmitted.)
54 : // Returns null on an error.
55 : QString* load(const QString& htmlString);
56 :
57 : // Returns the previously loaded document, or the empty string if there was an error.
58 3 : inline QString* getDocument() { return error ? nullptr : &document; }
59 :
60 0 : inline QUrl getOriginalURL() const { return originalUrl; }
61 :
62 0 : inline bool isDone() const { return done; }
63 :
64 : private slots:
65 : // Internal load methods.
66 : void loadInternal(const QUrl &url);
67 : QString* loadInternal(const QString& htmlString, bool handleRefresh);
68 :
69 : // Uh oh, an error!
70 : void onDownloadError(const QUrl& url, const QString& errorString);
71 :
72 : // We got some HTTP content!
73 : void onDownloadFinished(const QUrl& url, const QByteArray& data);
74 :
75 : // Searches the XHTML'd document for a redirect URL.
76 : // Returns the redirect URL, or the empty string.
77 : QString searchForRedirect(const QString& document);
78 :
79 : // Handles the boilerplate involved with emitting the ready() signal.
80 : void emitReadySignal(QString* document);
81 :
82 : private:
83 : void init();
84 :
85 : NetworkDownloadCore* core;
86 : QString document;
87 : bool handleMetaRefresh;
88 : int redirectAttempts;
89 : bool error;
90 : QUrl originalUrl;
91 : bool done;
92 : };
93 :
94 : #endif // WEBPAGEGRABBER_H
|