Line data Source code
1 : #include "NewsParser.h"
2 :
3 : #include <QFile>
4 : #include <QFileInfo>
5 :
6 : #include "../utilities/FangLogging.h"
7 :
8 : #include "ParserXMLWorker.h"
9 :
10 117 : NewsParser::NewsParser(QObject *parent) :
11 : ParserInterface(parent),
12 117 : feed(nullptr), result(OK), networkError(QNetworkReply::NetworkError::NoError),
13 117 : activeManager(&manager),
14 234 : currentReply(nullptr), redirectReply(nullptr),
15 117 : fromCache(false), noParseIfCached(false),
16 117 : redirectAttempts(0),
17 117 : permanentRedirect(false)
18 : {
19 : // Connex0r teh siganls.
20 117 : connect(activeManager, &QNetworkAccessManager::finished,
21 117 : this, &NewsParser::netFinished);
22 :
23 : // Setup the worker object.
24 117 : ParserXMLWorker* worker = new ParserXMLWorker();
25 117 : worker->moveToThread(&workerThread);
26 117 : connect(&workerThread, &QThread::finished, worker, &QObject::deleteLater);
27 117 : connect(this, &NewsParser::triggerDocStart, worker, &ParserXMLWorker::documentStart);
28 117 : connect(this, &NewsParser::triggerDocEnd, worker, &ParserXMLWorker::documentEnd);
29 117 : connect(this, &NewsParser::triggerAddXML, worker, &ParserXMLWorker::addXML);
30 117 : connect(worker, &ParserXMLWorker::done, this, &NewsParser::workerDone);
31 :
32 117 : workerThread.start();
33 117 : }
34 :
35 5 : NewsParser::NewsParser(QNetworkAccessManager* networkManager, QObject *parent) :
36 : ParserInterface(parent),
37 5 : feed(nullptr), result(OK), networkError(QNetworkReply::NetworkError::NoError),
38 5 : activeManager(networkManager ? networkManager : &manager),
39 10 : currentReply(nullptr), redirectReply(nullptr),
40 5 : fromCache(false), noParseIfCached(false),
41 5 : redirectAttempts(0),
42 10 : permanentRedirect(false)
43 : {
44 5 : connect(activeManager, &QNetworkAccessManager::finished,
45 5 : this, &NewsParser::netFinished);
46 :
47 5 : ParserXMLWorker* worker = new ParserXMLWorker();
48 5 : worker->moveToThread(&workerThread);
49 5 : connect(&workerThread, &QThread::finished, worker, &QObject::deleteLater);
50 5 : connect(this, &NewsParser::triggerDocStart, worker, &ParserXMLWorker::documentStart);
51 5 : connect(this, &NewsParser::triggerDocEnd, worker, &ParserXMLWorker::documentEnd);
52 5 : connect(this, &NewsParser::triggerAddXML, worker, &ParserXMLWorker::addXML);
53 5 : connect(worker, &ParserXMLWorker::done, this, &NewsParser::workerDone);
54 :
55 5 : workerThread.start();
56 5 : }
57 :
58 162 : NewsParser::~NewsParser()
59 : {
60 122 : workerThread.quit();
61 122 : workerThread.wait();
62 :
63 122 : delete currentReply;
64 162 : }
65 :
66 5 : void NewsParser::parse(const QUrl& url, bool noParseIfCached,
67 : const QString& ifNoneMatch, const QString& ifModifiedSince)
68 : {
69 : // Reset redirect counter.
70 5 : redirectAttempts = 0;
71 5 : permanentRedirect = false;
72 :
73 5 : parseInternal(url, noParseIfCached, ifNoneMatch, ifModifiedSince);
74 5 : }
75 :
76 9 : void NewsParser::parseInternal(const QUrl& url, bool noParseIfCached,
77 : const QString& ifNoneMatch, const QString& ifModifiedSince)
78 : {
79 9 : initParse(url);
80 :
81 9 : this->noParseIfCached = noParseIfCached;
82 9 : this->condIfNoneMatch = ifNoneMatch;
83 9 : this->condIfModifiedSince = ifModifiedSince;
84 :
85 : // in with the new
86 9 : QNetworkRequest request(url);
87 :
88 : // Sets a 30 second timeout in case the connection is lost or screwy.
89 9 : request.setTransferTimeout(30000);
90 :
91 : // Conditional request headers for ETag/Last-Modified support.
92 9 : if (!ifNoneMatch.isEmpty()) {
93 0 : request.setRawHeader("If-None-Match", ifNoneMatch.toUtf8());
94 : }
95 9 : if (!ifModifiedSince.isEmpty()) {
96 0 : request.setRawHeader("If-Modified-Since", ifModifiedSince.toUtf8());
97 : }
98 :
99 9 : if (currentReply) {
100 4 : currentReply->disconnect(this);
101 4 : currentReply->deleteLater();
102 : }
103 :
104 9 : currentReply = activeManager->get(request);
105 9 : connect(currentReply, &QNetworkReply::readyRead, this, &NewsParser::readyRead);
106 9 : connect(currentReply, &QNetworkReply::metaDataChanged, this, &NewsParser::metaDataChanged);
107 9 : connect(currentReply, &QNetworkReply::errorOccurred, this, &NewsParser::error);
108 9 : }
109 :
110 :
111 77 : void NewsParser::parseFile(const QString &filename)
112 : {
113 77 : initParse();
114 :
115 77 : QFile file(filename);
116 :
117 77 : if (!file.exists()) {
118 0 : qCCritical(logParser) << "NewsParser::parseFile: File does not exist:" << filename;
119 0 : return;
120 : }
121 :
122 77 : if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) {
123 0 : qCCritical(logParser) << "NewsParser::parseFile: Cannot open file:" << filename;
124 0 : return;
125 : }
126 :
127 77 : QByteArray data = file.readAll();
128 77 : emit triggerAddXML(data);
129 77 : emit triggerDocEnd();
130 77 : }
131 :
132 0 : void NewsParser::error(QNetworkReply::NetworkError ne)
133 : {
134 : Q_UNUSED(ne);
135 0 : currentReply->disconnect(this);
136 0 : currentReply->deleteLater();
137 0 : currentReply = 0;
138 :
139 0 : result = NewsParser::NETWORK_ERROR;
140 0 : networkError = ne;
141 0 : emit done();
142 0 : }
143 :
144 :
145 5 : void NewsParser::readyRead()
146 : {
147 5 : int statusCode = currentReply->attribute(QNetworkRequest::HttpStatusCodeAttribute).toInt();
148 :
149 : // 304 Not Modified: Content hasn't changed, nothing to do.
150 5 : if (statusCode == 304) {
151 0 : currentReply->disconnect(this);
152 0 : currentReply->deleteLater();
153 0 : currentReply = nullptr;
154 :
155 0 : result = NewsParser::NOT_MODIFIED;
156 0 : emit done();
157 0 : return;
158 : }
159 :
160 5 : if (statusCode >= 200 && statusCode < 300) {
161 5 : QByteArray data = currentReply->readAll();
162 5 : emit triggerAddXML(data);
163 5 : }
164 : }
165 :
166 9 : void NewsParser::metaDataChanged()
167 : {
168 : // Capture ETag and Last-Modified response headers.
169 18 : if (currentReply->hasRawHeader("ETag")) {
170 0 : respEtag = QString::fromUtf8(currentReply->rawHeader("ETag"));
171 : }
172 18 : if (currentReply->hasRawHeader("Last-Modified")) {
173 0 : respLastModified = QString::fromUtf8(currentReply->rawHeader("Last-Modified"));
174 : }
175 :
176 18 : QUrl redirectionTarget = currentReply->attribute(
177 9 : QNetworkRequest::RedirectionTargetAttribute).toUrl();
178 :
179 9 : if (redirectionTarget.isValid()) {
180 : // Guard against unlimited redirects.
181 4 : if (redirectAttempts >= MAX_PARSER_REDIRECTS) {
182 0 : qCDebug(logParser) << "NewsParser: Maximum redirects reached, aborting";
183 0 : currentReply->disconnect(this);
184 0 : currentReply->deleteLater();
185 0 : currentReply = nullptr;
186 :
187 0 : result = NewsParser::NETWORK_ERROR;
188 0 : networkError = QNetworkReply::TooManyRedirectsError;
189 0 : emit done();
190 0 : return;
191 : }
192 :
193 4 : int statusCode = currentReply->attribute(QNetworkRequest::HttpStatusCodeAttribute).toInt();
194 4 : if (statusCode == 301 || statusCode == 308) {
195 2 : permanentRedirect = true;
196 : }
197 :
198 8 : qCDebug(logParser) << "Redirect:" << redirectionTarget.toString();
199 4 : redirectAttempts++;
200 4 : redirectReply = currentReply;
201 : // Don't send conditional headers on redirect -- the new URL may be different.
202 4 : parseInternal(redirectionTarget, noParseIfCached);
203 : }
204 :
205 18 : if (currentReply->attribute(
206 9 : QNetworkRequest::SourceIsFromCacheAttribute).isValid()) {
207 0 : if (currentReply->attribute(
208 0 : QNetworkRequest::SourceIsFromCacheAttribute).toBool()) {
209 0 : if (noParseIfCached) {
210 : // Early exit for cache.
211 0 : currentReply->disconnect(this);
212 0 : currentReply->deleteLater();
213 0 : currentReply = 0;
214 :
215 0 : result = NewsParser::OK;
216 0 : emit done();
217 :
218 0 : return;
219 : }
220 : }
221 : }
222 9 : }
223 :
224 0 : NewsParser::ParseResult NewsParser::getResult()
225 : {
226 0 : return result;
227 : }
228 :
229 77 : RawFeed* NewsParser::getFeed()
230 : {
231 77 : return result == NewsParser::OK ? feed : nullptr;
232 : }
233 :
234 9 : void NewsParser::netFinished(QNetworkReply *reply)
235 : {
236 9 : if (redirectReply == reply) {
237 4 : return; // This was the previous redirect.
238 : }
239 :
240 : // Remember this URL.
241 5 : finalFeedURL = reply->url();
242 :
243 : // Tell the worker that we're done.
244 5 : emit triggerDocEnd();
245 : }
246 :
247 82 : void NewsParser::workerDone(RawFeed* rawFeed)
248 : {
249 82 : if (result != NewsParser::IN_PROGRESS) {
250 : // Already emitted a finished signal. Nothing to dooooo.
251 0 : return;
252 : }
253 :
254 82 : if (rawFeed) {
255 82 : feed = rawFeed;
256 :
257 : // This means we saw... something. Do a sanity check here to
258 : // make sure what we found was an actual feed.
259 82 : if (feed->items.size() > 0 || feed->title != "") {
260 82 : feed->url = finalFeedURL;
261 :
262 82 : result = NewsParser::OK;
263 82 : emit done();
264 :
265 82 : return; // Early exit on SUCCESS!! (yay)
266 : }
267 : }
268 :
269 : // What we found must not have been an RSS/Atom feed.
270 0 : result = NewsParser::PARSE_ERROR;
271 0 : emit done();
272 : }
273 :
274 86 : void NewsParser::initParse(const QUrl& url)
275 : {
276 86 : result = NewsParser::IN_PROGRESS;
277 86 : networkError = QNetworkReply::NetworkError::NoError;
278 86 : finalFeedURL = url;
279 86 : respEtag = QString();
280 86 : respLastModified = QString();
281 86 : emit triggerDocStart();
282 86 : }
|