Line data Source code
1 : #ifndef WEBPAGEGRABBER_H
2 : #define WEBPAGEGRABBER_H
3 :
4 : #include <QObject>
5 : #include <QString>
6 : #include <QUrl>
7 : #include <QTimer>
8 : #include "../FangObject.h"
9 : #include "SimpleHTTPDownloader.h"
10 :
11 : #define DEFAULT_HANDLE_META_REFRESH true
12 : #define DEFAULT_TIMEOUT_MS 5000
13 :
14 : /*!
15 : * @brief Loads a web page at a given URL and signals with the XHTML document when done.
16 : *
17 : * Note that this class is not rentrant.
18 : */
19 : class WebPageGrabber : public FangObject
20 : {
21 : Q_OBJECT
22 : public:
23 : /**
24 : * @brief WebPageGrabber creates an XHTML document from either a string or a URL.
25 : * @param handleMetaRefresh If true, handles refreshes from within HTML documents rather than
26 : * just HTTP communication.
27 : * @param timeoutMS Timeout after last download activity in milliseconds
28 : * @param parent
29 : * @param networkManager Optional network manager for dependency injection (for testing)
30 : */
31 : explicit WebPageGrabber(bool handleMetaRefresh = DEFAULT_HANDLE_META_REFRESH,
32 : int timeoutMS = DEFAULT_TIMEOUT_MS,
33 : QObject *parent = nullptr,
34 : QNetworkAccessManager* networkManager = nullptr);
35 :
36 : /*!
37 : * \brief Just like the above but with all the defaults specified *except* for parent.
38 : */
39 : explicit WebPageGrabber(QObject *parent);
40 :
41 :
42 : signals:
43 : // If you requested a URL, ready() will be emitted when it's ready!
44 : // If document is null, an error happened. :(
45 : void ready(WebPageGrabber* grabber, QString* document);
46 :
47 : public slots:
48 : // Fetches the webpage and emits ready() with the XHTML document.
49 : // Signals with null on an error.
50 : virtual void load(const QUrl &url);
51 :
52 : // Load the HTML string into a Tidy'd XHTML document and returns it (no signal is emmitted.)
53 : // Returns null on an error.
54 : QString* load(const QString& htmlString);
55 :
56 : // Returns the previously loaded document, or the empty string if there was an error.
57 3 : inline QString* getDocument() { return error ? nullptr : &document; }
58 :
59 0 : inline QUrl getOriginalURL() const { return originalUrl; }
60 :
61 0 : inline bool isDone() const { return done; }
62 :
63 : private slots:
64 : // Internal load methods.
65 : void loadInternal(const QUrl &url);
66 : QString* loadInternal(const QString& htmlString, bool handleRefresh);
67 :
68 : // Uh oh, an error!
69 : void onDownloadError(QString err);
70 :
71 : // We got some HTTP content!
72 : void onDownloadFinished(QByteArray array);
73 :
74 : // Searches the XHTML'd document for a redirect URL.
75 : // Returns the redirect URL, or the empty string.
76 : QString searchForRedirect(const QString& document);
77 :
78 : // Handles the boilerplate involved with emitting the ready() signal.
79 : void emitReadySignal(QString* document);
80 :
81 : private:
82 : void init();
83 :
84 : SimpleHTTPDownloader downloader;
85 : QString document;
86 : bool handleMetaRefresh;
87 : int redirectAttempts;
88 : bool error;
89 : QUrl originalUrl;
90 : bool done;
91 : };
92 :
93 : #endif // WEBPAGEGRABBER_H
|