Line data Source code
1 : #include "FaviconGrabber.h"
2 : #include <QString>
3 : #include <QStringList>
4 : #include <QImage>
5 : #include <QXmlStreamReader>
6 :
7 : #include "NetworkUtilities.h"
8 : #include "FangLogging.h"
9 :
10 4 : FaviconGrabber::FaviconGrabber(QObject *parent, QNetworkAccessManager* networkManager) :
11 : FangObject(parent),
12 4 : repliesWaiting(0),
13 4 : manager(networkManager ? networkManager : new FangNetworkAccessManager(this)),
14 4 : ownsManager(networkManager == nullptr),
15 8 : webGrabber(true, 5000, this, networkManager)
16 : {
17 : // Set up our state machine.
18 8 : machine.addStateChange(START, WEB_GRABBER, [this]() { onWebGrabber(); });
19 8 : machine.addStateChange(WEB_GRABBER, CHECK_ICONS, [this]() { onCheckIcons(); });
20 8 : machine.addStateChange(CHECK_ICONS, PICK_BEST, [this]() { onPickBest(); });
21 :
22 4 : machine.addStateChange(-1, GRAB_ERROR, [this]() { onError(); }); // Many errors, one slot.
23 :
24 : // Signals!
25 4 : connect(manager, &QNetworkAccessManager::finished, this, &FaviconGrabber::onRequestFinished);
26 4 : connect(&webGrabber, &WebPageGrabber::ready, this, &FaviconGrabber::onWebGrabberReady);
27 4 : }
28 :
29 4 : void FaviconGrabber::find(const QUrl &url)
30 : {
31 4 : urlsToCheck.clear();
32 4 : faviconReplies.clear();
33 4 : location = url;
34 4 : machine.start(START);
35 :
36 : // Make a list of "root" favicons.
37 4 : QUrl host = NetworkUtilities::getHost(location);
38 24 : const QStringList extensions{"ico", "jpg", "jpeg", "png", "gif"};
39 :
40 : // Add each extension to our list.
41 24 : for (const QString& ext : extensions) {
42 20 : QUrl toCheck(host);
43 20 : toCheck.setPath("/favicon." + ext);
44 20 : urlsToCheck << toCheck;
45 20 : }
46 :
47 4 : machine.setState(WEB_GRABBER);
48 8 : }
49 :
50 4 : void FaviconGrabber::onWebGrabber()
51 : {
52 : // Check for favicons embedded in the HTML.
53 : // We look at the main page rather than the feed.
54 4 : webGrabber.load(NetworkUtilities::getHost(location));
55 4 : }
56 :
57 4 : void FaviconGrabber::onCheckIcons()
58 : {
59 4 : if (urlsToCheck.isEmpty()) {
60 0 : machine.setState(GRAB_ERROR);
61 0 : return;
62 : }
63 :
64 : // Pop each URL off the list and check it
65 4 : repliesWaiting = 0;
66 28 : while (!urlsToCheck.isEmpty()) {
67 24 : repliesWaiting++;
68 :
69 24 : QUrl url = urlsToCheck.takeFirst();
70 :
71 : // If it's not a fully formed URL, fill it out.
72 24 : if (url.isValid() && url.isRelative()) {
73 0 : QUrl newUrl = location;
74 0 : newUrl.setPath(url.path());
75 0 : url = newUrl;
76 0 : }
77 :
78 24 : QNetworkRequest request(url);
79 24 : QNetworkReply* reply = manager->get(request);
80 24 : faviconReplies.insert(reply); // Track this reply
81 24 : }
82 : }
83 :
84 4 : void FaviconGrabber::onPickBest()
85 : {
86 4 : if (imagesToCheck.isEmpty()) {
87 0 : machine.setState(GRAB_ERROR);
88 4 : return;
89 : }
90 :
91 4 : int topTotalPixels = 0;
92 4 : QUrl topURL;
93 :
94 : // Go over all the images. Find the one with the max total pixels.
95 8 : for (const auto& pair : std::as_const(imagesToCheck)) {
96 4 : const QImage& img = pair.second;
97 4 : int totalPixels = img.width() * img.height();
98 4 : if (totalPixels > topTotalPixels) {
99 4 : topTotalPixels = totalPixels;
100 4 : topURL = pair.first;
101 : }
102 : }
103 :
104 4 : if (topTotalPixels > 0) {
105 4 : emit finished(topURL);
106 4 : return;
107 : }
108 :
109 0 : machine.setState(GRAB_ERROR);
110 4 : }
111 :
112 0 : void FaviconGrabber::onError()
113 : {
114 0 : emit finished(QUrl()); // invalid URL
115 0 : }
116 :
117 28 : void FaviconGrabber::onRequestFinished(QNetworkReply * reply)
118 : {
119 : // Only process replies that belong to us (ignore WebPageGrabber's replies)
120 28 : if (!faviconReplies.contains(reply)) {
121 4 : return;
122 : }
123 :
124 : // Remove from our tracking set
125 24 : faviconReplies.remove(reply);
126 :
127 24 : if (!reply->error()) {
128 24 : QImage img;
129 24 : QByteArray data = reply->readAll();
130 48 : qCDebug(logFavicon) << "Trying to load image from" << reply->url() << "size:" << data.size();
131 :
132 24 : if (img.loadFromData(data)) {
133 8 : qCDebug(logFavicon) << "Successfully loaded image:" << img.width() << "x" << img.height();
134 4 : imagesToCheck << QPair<QUrl, QImage>(reply->url(), img);
135 : } else {
136 40 : qCDebug(logFavicon) << "Failed to load image from data";
137 : }
138 24 : }
139 :
140 24 : repliesWaiting--;
141 48 : qCDebug(logFavicon) << "repliesWaiting:" << repliesWaiting << "imagesToCheck.size():" << imagesToCheck.size();
142 24 : if (!repliesWaiting) {
143 4 : machine.setState(PICK_BEST);
144 : }
145 : }
146 :
147 4 : void FaviconGrabber::onWebGrabberReady(WebPageGrabber* grabber, QString *document)
148 : {
149 : Q_UNUSED(grabber);
150 :
151 : // Ignore responses that arrive after we've already moved past WEB_GRABBER state
152 : // (e.g., multiple async responses from favicon URLs being parsed as HTML)
153 4 : if (machine.getState() != WEB_GRABBER) {
154 0 : return;
155 : }
156 :
157 : // Could indicate no internet.
158 4 : if (document == nullptr || document->isEmpty()) {
159 0 : machine.setState(CHECK_ICONS);
160 :
161 0 : return;
162 : }
163 :
164 4 : findIcons(*document);
165 :
166 4 : machine.setState(CHECK_ICONS);
167 : }
168 :
169 4 : void FaviconGrabber::findIcons(const QString& document)
170 : {
171 : // Examples of what we're looking for:
172 : // <link rel="apple-touch-icon" href="/apple-touch-icon.png" />
173 : // <link rel="icon" href="/favicon.ico" type="image/x-icon" />
174 : // <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon" />
175 :
176 4 : QXmlStreamReader xml;
177 4 : xml.addData(document);
178 :
179 80 : while (!xml.atEnd()) {
180 76 : xml.readNext();
181 :
182 76 : if (xml.isStartElement()) {
183 24 : QString tagName = xml.name().toString().toLower();
184 24 : if (tagName == "body") {
185 : // We're done with the header, so bail.
186 4 : return;
187 : }
188 :
189 20 : if (tagName == "link") {
190 4 : QXmlStreamAttributes attributes = xml.attributes();
191 8 : if (attributes.hasAttribute("rel") && attributes.hasAttribute("href")) {
192 4 : QString rel = attributes.value("", "rel").toString().toLower();
193 4 : if (rel == "apple-touch-icon" || rel == "icon" || rel == "shortcut icon") {
194 8 : urlsToCheck << QUrl(attributes.value("", "href").toString());
195 : }
196 4 : }
197 4 : }
198 24 : }
199 : }
200 4 : }
|