Line data Source code
1 : #ifndef FEEDDISCOVERY_H
2 : #define FEEDDISCOVERY_H
3 :
4 : #include <QList>
5 : #include <QObject>
6 : #include <QString>
7 : #include <QUrl>
8 :
9 : #include <QSimpleStateMachine/QSimpleStateMachine.h>
10 :
11 : #include "../parser/ParserInterface.h"
12 : #include "../parser/RawFeed.h"
13 : #include "../parser/BatchNewsParser.h"
14 : #include "../utilities/WebPageGrabber.h"
15 : #include "../FangObject.h"
16 :
17 : /**
18 : * @brief Attempts to match a user-submitted, URL like "bob.com" to an actual news feed.
19 : * This is done by massaging the URL so that Qt can fetch a document. If that document is
20 : * a web page, we search for a link to the RSS or Atom feed in the HTML. Then we check
21 : * that document to see if it's an RSS feed.
22 : *
23 : * HTTP and HTML redirects are handled.
24 : *
25 : * State machine diagram:
26 : *
27 : * CHECK_FEED
28 : * | \- ERROR
29 : * |
30 : * TRY_FEED
31 : * | \- FEED_FOUND
32 : * |
33 : * WEB_GRABBER
34 : * | \- ERROR
35 : * |
36 : * TRY_FEED_AGAIN
37 : * | \- ERROR
38 : * |
39 : * FEED_FOUND
40 : */
41 : class FeedDiscovery : public FangObject
42 : {
43 : Q_OBJECT
44 :
45 : private:
46 :
47 : enum FeedDiscoveryState {
48 : CHECK_FEED,
49 : TRY_FEED,
50 : WEB_GRABBER,
51 : VALIDATE_FEEDS, // Bulk feed validation
52 : FEED_FOUND,
53 : FEED_ERROR
54 : };
55 :
56 : public:
57 : /**
58 : * @brief Structure to hold a discovered feed with metadata
59 : */
60 : struct DiscoveredFeed {
61 : QUrl url; // Feed URL
62 : QString title; // Feed title (from parsed feed or URL)
63 : QString content; // Downloaded feed content (for lazy parsing)
64 : RawFeed* feed; // Parsed feed (nullptr if not yet parsed)
65 : bool validated; // Has this feed been successfully parsed?
66 :
67 19 : DiscoveredFeed() : feed(nullptr), validated(false) {}
68 : };
69 :
70 : explicit FeedDiscovery(QObject *parent = nullptr,
71 : ParserInterface* firstParser = nullptr,
72 : ParserInterface* secondParser = nullptr,
73 : WebPageGrabber* pageGrabber = nullptr,
74 : BatchNewsParser* feedParser = nullptr);
75 : virtual ~FeedDiscovery();
76 :
77 : /**
78 : * @return After done(), this returns true if there was an error.
79 : */
80 27 : bool error() { return _error; }
81 :
82 : /**
83 : * @return After done(), this returns the error string, if there was an error.
84 : */
85 7 : QString errorString() { return _errorString; }
86 :
87 : /**
88 : * @return The feed URL, or an empty URL if there was an error.
89 : * For backward compatibility: returns first validated feed.
90 : */
91 17 : QUrl feedURL() { return _error ? QUrl("") : _feedURL; }
92 :
93 : /**
94 : * @return The raw feed or nullptr if there was an error.
95 : * For backward compatibility: returns first validated feed.
96 : */
97 1 : RawFeed* feedResult() { return _error ? nullptr : _feedResult; }
98 :
99 : /**
100 : * @return List of all discovered feeds (may be empty if error or single-feed mode)
101 : */
102 2 : QList<DiscoveredFeed> discoveredFeeds() const { return _discoveredFeeds; }
103 :
104 : /**
105 : * @return Number of feeds discovered (0 = error or single-feed mode, 1+ = multi-feed)
106 : */
107 2 : int feedCount() const { return _discoveredFeeds.count(); }
108 :
109 : signals:
110 :
111 : /**
112 : * @brief Completion signal. Check for error; if false, get feedURL()
113 : * @param feedDiscovery
114 : */
115 : void done(FeedDiscovery* feedDiscovery);
116 :
117 : public slots:
118 :
119 : /**
120 : * @brief Call this with a feed URL to check to get started! Wait for done()
121 : * @param sURL
122 : */
123 : virtual void checkFeed(QString sURL);
124 :
125 : /**
126 : * @brief Try to find RSS and Atom feed(s), if available.
127 : * External use: Intended for use in unit tests.
128 : * @param document
129 : */
130 : QList<QString> parseFeedsFromXHTML(const QString& document);
131 :
132 : private slots:
133 :
134 : // State change slots:
135 : void onTryFeed();
136 : void onFeedFound();
137 : void onWebGrabber();
138 : void onValidateFeeds(); // Bulk feed validation
139 : void onError();
140 :
141 : // Parser/BulkParser slots:
142 : void onFirstParseDone();
143 : void onFeedParserReady();
144 :
145 : // WebPageGrabber slots:
146 : void onPageGrabberReady(WebPageGrabber* grabber, QString* document);
147 :
148 : protected:
149 : ParserInterface* parserFirstTry;
150 : WebPageGrabber* pageGrabber; // For fetching HTML pages
151 : BatchNewsParser* feedParser; // For bulk feed parsing
152 :
153 : private:
154 : // Sets the error flag, error string, and triggers the ERROR state.
155 : void reportError(QString errorString);
156 :
157 : QSimpleStateMachine machine;
158 :
159 : QUrl _feedURL;
160 : bool _error;
161 : QString _errorString;
162 :
163 : RawFeed* _feedResult;
164 :
165 : // Multi-feed discovery state
166 : QList<DiscoveredFeed> _discoveredFeeds; // All discovered feeds
167 : QList<QUrl> _sortedFeedURLs; // Feed URLs to validate (sorted by path length)
168 : };
169 :
170 : #endif // FEEDDISCOVERY_H
|