LCOV - code coverage report
Current view: top level - src/parser - ParserXMLWorker.cpp (source / functions) Coverage Total Hit
Test: coverage.info.cleaned Lines: 92.4 % 353 326
Test Date: 2026-03-23 10:19:47 Functions: 100.0 % 17 17

            Line data    Source code
       1              : #include "ParserXMLWorker.h"
       2              : #include <QMap>
       3              : #include <QtCore/qtimezone.h>
       4              : #include "../utilities/ErrorHandling.h"
       5              : #include "../utilities/FangLogging.h"
       6              : 
       7              : // Some feeds (e.g. excelsior.com.mx) double-escape CDATA markers, producing
       8              : // literal "<![CDATA[...]]>" text instead of actual CDATA sections. Strip them.
       9         6672 : static QString stripEscapedCDATA(const QString& text)
      10              : {
      11         6672 :     QString trimmed = text.trimmed();
      12         6672 :     if (trimmed.startsWith("<![CDATA[") && trimmed.endsWith("]]>")) {
      13            0 :         return trimmed.mid(9, trimmed.length() - 12).trimmed();
      14              :     }
      15         6672 :     return text;
      16         6672 : }
      17              : 
      18          122 : ParserXMLWorker::ParserXMLWorker(QObject *parent) :
      19          122 :     FangObject(parent), feed(nullptr), currentItem(nullptr), isValid(false), inAtomXHTML(false)
      20              : {
      21          122 : }
      22              : 
      23          244 : ParserXMLWorker::~ParserXMLWorker()
      24              : {
      25          122 :     delete feed;
      26          244 : }
      27              : 
      28           86 : void ParserXMLWorker::documentStart()
      29              : {
      30              :     // Make a new feed!  Yay!
      31           86 :     delete feed;
      32           86 :     feed = new RawFeed();
      33           86 :     isValid = true;
      34              :     
      35           86 :     resetParserVars();
      36           86 : }
      37              : 
      38           82 : void ParserXMLWorker::documentEnd()
      39              : {
      40           82 :     if (isValid) {
      41           82 :         if (feed->items.size() == 0) {
      42              :             // Edge case: we typically save the summary when we encounter the first item. This
      43              :             // handles the case where they were no items but we might have a summary.
      44            1 :             saveSummary();
      45              :         }
      46           82 :         emit done(feed);
      47              :     }
      48              :     
      49              :     // If it's not valid, we already emitted a signal.
      50           82 : }
      51              : 
      52           82 : void ParserXMLWorker::addXML(QByteArray data)
      53              : {
      54           82 :     if (!isValid) {
      55            0 :         return;
      56              :     }
      57              :     
      58           82 :     xml.addData(data);
      59              :     
      60       101247 :     while (!xml.atEnd()) {
      61              :         // Grab the next thingie.
      62       101165 :         xml.readNext();
      63              :         
      64       101165 :         if (xml.isStartElement()) {
      65        27177 :             elementStart();
      66        73988 :         } else if (xml.isEndElement()) {
      67        27177 :             elementEnd();
      68        46811 :         } else if (xml.isCharacters() && !xml.isWhitespace()) {
      69        20313 :             elementContents();
      70              :         }
      71              :     }
      72              :     
      73              :     // Standards need to be a bit lax for RSS.
      74           82 :     if (xml.error() && xml.error() != QXmlStreamReader::PrematureEndOfDocumentError &&
      75            0 :             xml.error() != QXmlStreamReader::NotWellFormedError) {
      76            0 :         isValid = false;
      77            0 :         qCWarning(logParser) << "XML ERROR:" << xml.lineNumber() << ": " << xml.errorString();
      78            0 :         emit done(nullptr);
      79              :     }
      80              :     
      81              : }
      82              : 
      83              : 
      84        27177 : void ParserXMLWorker::elementStart()
      85              : {
      86        27177 :     QString tagName = xml.name().toString().toLower();
      87              :     
      88              :     // Look for start of entries.
      89              :     //qDebug() << "XML node: " << xml.name().toString() << " " << xml.prefix().toString();
      90        27177 :     if ((tagName == "item" || tagName == "entry") && !inAtomXHTML) {
      91              :         
      92         2224 :         if (urlHref.isEmpty()) {
      93         4364 :             urlHref = xml.attributes().value("rss:about").toString();
      94              :         }
      95              :         
      96         2224 :         if (numItems == 0) {
      97              :             // Oh, first item?  Assume we've seen the summary then.
      98           81 :             saveSummary();
      99              :         }
     100              :         
     101         2224 :         currentItem = new RawNews(feed);
     102         2224 :         numItems++;
     103        25775 :     } else if ((tagName == "content" || tagName == "summary") && 
     104        26597 :                xml.attributes().value("type").toString().toLower() == "xhtml") {
     105              :         // Atom has a crappy feature where you can just stick unescaped xhtml
     106              :         // into the Atom's DOM.  Someone at Google must not believe in SAX
     107              :         // parsers, I guess?
     108           99 :         inAtomXHTML = true;
     109        24854 :     } else if (inAtomXHTML) {
     110              :         // Build a string of the tag's elements.
     111         2802 :         QString elements = "";
     112         2802 :         QXmlStreamAttributes attributes = xml.attributes();
     113         4204 :         for (QXmlStreamAttribute attribute : attributes) {
     114         2804 :             elements += " " + attribute.name().toString() + "=\""
     115         4206 :                         + attribute.value().toString() + "\"";
     116         1402 :         }
     117              :         
     118              :         // Mash the tag together.
     119         2802 :         content += "<" + xml.qualifiedName().toString() + elements + ">";
     120              :         
     121              :         // Early exit!
     122         2802 :         return;
     123         2802 :     }
     124              :     
     125        24375 :     currentTag = tagName;
     126        24375 :     currentPrefix = xml.prefix().toString().toLower();
     127        48750 :     hasType = xml.attributes().hasAttribute("type");
     128              : 
     129              :     // Podcast detection: only flag itunes elements that are specific to actual
     130              :     // podcast feeds. Many non-podcast feeds (e.g. Substack blogs) include generic
     131              :     // itunes metadata like itunes:owner, itunes:author, and itunes:block.
     132        24375 :     if (currentPrefix == "itunes") {
     133         1441 :         if (currentTag == "duration" || currentTag == "episode"
     134          655 :                 || currentTag == "episodetype" || currentTag == "season"
     135         1441 :                 || currentTag == "explicit" || currentTag == "category") {
     136          264 :             hasPodcastSignals = true;
     137              :         }
     138              :     }
     139              : 
     140              :     // Podcast detection: audio enclosures.
     141        24375 :     if (currentTag == "enclosure") {
     142          796 :         QString type = xml.attributes().value("type").toString().toLower();
     143          398 :         if (type.startsWith("audio/")) {
     144          254 :             hasPodcastSignals = true;
     145              :         }
     146          398 :     }
     147              : 
     148              :     // Media RSS image extraction (media:thumbnail and media:content).
     149        24375 :     if (currentItem != nullptr && currentPrefix == "media") {
     150         1441 :         if (currentTag == "thumbnail") {
     151         1204 :             QString url = xml.attributes().value("url").toString();
     152         1204 :             int width = xml.attributes().value("width").toString().toInt();
     153          602 :             if (!url.isEmpty() && (mediaImageURL.isEmpty() || width > mediaImageWidth)) {
     154          602 :                 mediaImageURL = url;
     155          602 :                 mediaImageWidth = width;
     156              :             }
     157         1441 :         } else if (currentTag == "content") {
     158          834 :             QString type = xml.attributes().value("type").toString().toLower();
     159          417 :             if (type.startsWith("image/")) {
     160          470 :                 QString url = xml.attributes().value("url").toString();
     161          470 :                 int width = xml.attributes().value("width").toString().toInt();
     162          235 :                 if (!url.isEmpty() && (mediaImageURL.isEmpty() || width > mediaImageWidth)) {
     163          173 :                     mediaImageURL = url;
     164          173 :                     mediaImageWidth = width;
     165              :                 }
     166          235 :             }
     167          417 :         }
     168              :     }
     169              : 
     170        26759 :     if (currentTag == "link" && urlHref.isEmpty() && xml.attributes().hasAttribute("href")) {
     171              :         // Used by atom feeds to grab the first link.
     172          440 :         urlHref = xml.attributes().value("href").toString();
     173              :     }
     174              :     
     175              :     // Add this new tag to our stack. :)
     176        24375 :     tagStack.push(tagName);
     177        27177 : }
     178              : 
     179        27177 : void ParserXMLWorker::elementEnd()
     180              : {
     181        27177 :     if (!inAtomXHTML) {
     182        24276 :         tagStack.pop(); // Pop our tag stack, we're through with this one!
     183              :     }
     184              :     
     185        27177 :     QString tagName = xml.name().toString().toLower();
     186              :     
     187        27177 :     if ((tagName == "item" || tagName == "entry") && !inAtomXHTML) {
     188              :         //qDebug() << "End element:" << xml.name().toString();
     189         2224 :         if (currentItem == nullptr) {
     190              :             // Throw some kinda error, this can't happen.
     191            0 :             qCDebug(logParser) << "Current item is null!";
     192            0 :             qCDebug(logParser) << "Current title: " << title;
     193            0 :             qCDebug(logParser) << "Xml element: " << tagName;
     194              :         }
     195              :         
     196              :         // Figure out which date to use.
     197         2224 :         QString timestamp;
     198         2224 :         if (!pubdate.trimmed().isEmpty()) {
     199         1977 :             timestamp = pubdate;
     200          247 :         } else if (!lastbuilddate.trimmed().isEmpty()) {
     201            0 :             timestamp = lastbuilddate;
     202          247 :         } else if (!created.trimmed().isEmpty()) {
     203           15 :             timestamp = created;
     204          232 :         } else if (!date.trimmed().isEmpty()) {
     205           69 :             timestamp = date;
     206          163 :         } else if (!updated.trimmed().isEmpty()) {
     207          163 :             timestamp = updated;
     208              :         }
     209              :         
     210              :         // Determine the GUID.
     211         2224 :         QString myGuid;
     212         2224 :         if (!id.trimmed().isEmpty()) {
     213          178 :             myGuid = id.trimmed();
     214         2046 :         } else if (!guid.trimmed().isEmpty()) {
     215         1562 :             myGuid = guid.trimmed();
     216          484 :         } else if (!urlData.trimmed().isEmpty()) {
     217          484 :             myGuid = urlData.trimmed();
     218              :         } else {
     219            0 :             myGuid = urlHref.trimmed();
     220              :         }
     221              : 
     222              :         // Skip items without a GUID - malformed feed
     223         2224 :         if (myGuid.isEmpty()) {
     224            0 :             qCWarning(logParser) << "ParserXMLWorker: RSS/Atom item missing GUID/URL, skipping item";
     225            0 :             qCWarning(logParser) << "  Title:" << title;
     226            0 :             delete currentItem;
     227            0 :             currentItem = nullptr;
     228              : 
     229              :             // Clear all strings for next item
     230            0 :             author = title = subtitle = content = QString();
     231            0 :             urlData = urlHref = guid = id = date = updated = timestamp = QString();
     232            0 :             return;
     233              :         }
     234              : 
     235              :         // Item space.
     236         2224 :         currentItem->author = author;
     237         2224 :         currentItem->title = stripEscapedCDATA(title);
     238         2224 :         currentItem->description = stripEscapedCDATA(subtitle);
     239         2224 :         currentItem->content = stripEscapedCDATA(content);
     240              : 
     241         2224 :         currentItem->mediaImageURL = mediaImageURL;
     242              : 
     243         2224 :         currentItem->url = urlData.isEmpty() ? QUrl(urlHref) : QUrl(urlData);
     244         2224 :         currentItem->timestamp = dateFromFeedString(timestamp);
     245         2224 :         currentItem->guid = myGuid;
     246              :         
     247              :         // Okay, give it up. :(
     248         2224 :         if (!currentItem->timestamp.isValid()) {
     249            0 :             qCDebug(logParser) << "Time string: " << timestamp;
     250            0 :             qCDebug(logParser) << "invalid date!";
     251              :         }
     252              :         
     253              :         
     254         2224 :         feed->items.append(currentItem);
     255         2224 :         feed->isPodcast = feed->isPodcast || hasPodcastSignals;
     256         2224 :         currentItem = nullptr;
     257              : 
     258              :         // Clear all strings.
     259         2224 :         title = "";
     260         2224 :         urlHref = "";
     261         2224 :         urlData = "";
     262         2224 :         subtitle = "";
     263         2224 :         pubdate = "";
     264         2224 :         lastbuilddate = "";
     265         2224 :         created = "";
     266         2224 :         date = "";
     267         2224 :         updated = "";
     268         2224 :         author = "";
     269         2224 :         content = "";
     270         2224 :         guid = "";
     271         2224 :         id = "";
     272         2224 :         mediaImageURL = "";
     273         2224 :         mediaImageWidth = 0;
     274        27177 :     } else if (tagName == "content" || tagName == "summary") {
     275              :         // Just accept that this is the end of one of these:
     276              :         // <contents type="xhtml">
     277          822 :         if (inAtomXHTML) {
     278           99 :             inAtomXHTML = false;
     279           99 :             tagStack.pop(); // We didn't do this earlier, you see.
     280              :         }
     281              :     }
     282              :     
     283        27177 :     if (inAtomXHTML) {
     284              :         // SLORG we need to add this tag to the contents.
     285              :         
     286              :         // TODO: Is there a better way to do this?!
     287         2802 :         content += "</" + xml.qualifiedName().toString() + ">";
     288              :     }
     289        27177 : }
     290              : 
     291        20313 : void ParserXMLWorker::elementContents()
     292              : {
     293        20313 :     if (inAtomXHTML) {
     294              :         // Atom sucks!
     295         1860 :         content += xml.text().toString();
     296              :         
     297         1860 :         return; // Early exit.
     298              :     }
     299              :     
     300        18453 :     QString parentTag = getTagStackAt(1);
     301        18453 :     if (parentTag == "item" || parentTag == "entry") {
     302              :         //
     303              :         // Inside a news item.
     304              :         //
     305              :         
     306        16940 :         if (currentTag == "title" && currentPrefix == "") {
     307         2224 :             title += xml.text().toString();
     308        14716 :         } else if (currentTag == "link" && currentPrefix == "") {
     309         2046 :             urlData += xml.text().toString();
     310        12670 :         } else if (currentTag == "description" || currentTag == "summary") {
     311         2229 :             subtitle += xml.text().toString();
     312        10441 :         } else if (currentTag == "name"
     313        10441 :                    || (currentTag == "creator" && currentPrefix == "dc")) {
     314          670 :             author += xml.text().toString();
     315         9771 :         } else if (currentTag == "pubdate") {
     316         1977 :             pubdate += xml.text().toString();
     317         7794 :         } else if (currentTag == "lastbuilddate") {
     318            0 :             lastbuilddate += xml.text().toString();
     319         7794 :         } else if (currentTag == "created") {
     320           15 :             created += xml.text().toString();
     321         7779 :         } else if (currentTag == "updated") {
     322          188 :             updated += xml.text().toString();
     323         7591 :         } else if (currentTag == "date") {
     324          154 :             date += xml.text().toString();
     325         7437 :         } else if (currentTag == "guid") {
     326         1562 :             guid += xml.text().toString();
     327         5875 :         } else if (currentTag == "id") {
     328          178 :             id += xml.text().toString();
     329         5962 :         } else if ((currentTag == "encoded" && currentPrefix == "content")
     330         5962 :                    || (currentTag == "content" && hasType)) {
     331          320 :             content += xml.text().toString();
     332              :         }
     333         1513 :     } else if (parentTag == "channel" || parentTag == "feed") {
     334              :         //
     335              :         // Top level items.
     336              :         //
     337              :         
     338          523 :         if (currentTag == "title" && currentPrefix == "") {
     339           82 :             title += xml.text().toString();
     340          441 :         } else if (currentTag == "link" && currentPrefix == "") {
     341           74 :             urlData += xml.text().toString();
     342          367 :         } else if (currentTag == "description" || currentTag == "summary") {
     343           57 :             subtitle += xml.text().toString();
     344              :         }
     345              :     }
     346        18453 : }
     347              : 
     348           86 : void ParserXMLWorker::resetParserVars()
     349              : {
     350           86 :     xml.clear();
     351              : 
     352           86 :     numItems = 0;
     353           86 :     currentTag = "";
     354           86 :     currentPrefix = "";
     355           86 :     urlHref = "";
     356           86 :     title = "";
     357           86 :     subtitle = "";
     358           86 :     content = "";
     359           86 :     pubdate = "";
     360           86 :     lastbuilddate = "";
     361           86 :     created = "";
     362           86 :     updated = "";
     363           86 :     date = "";
     364           86 :     author = "";
     365           86 :     guid = "";
     366           86 :     id = "";
     367           86 :     mediaImageURL = "";
     368           86 :     mediaImageWidth = 0;
     369           86 :     hasType = false;
     370           86 :     hasPodcastSignals = false;
     371           86 :     inAtomXHTML = false;
     372           86 :     tagStack.clear();
     373           86 : }
     374              : 
     375           82 : void ParserXMLWorker::saveSummary()
     376              : {
     377              :     // Global space.
     378           82 :     feed->title = title;
     379           82 :     feed->subtitle = subtitle;
     380           82 :     feed->siteURL = urlData.isEmpty() ? QUrl(urlHref) : QUrl(urlData);
     381           82 :     feed->isPodcast = hasPodcastSignals;
     382              : 
     383              :     // Clear all local strings.
     384           82 :     title = "";
     385           82 :     urlHref = "";
     386           82 :     urlData = "";
     387           82 :     subtitle = "";
     388           82 :     pubdate = "";
     389           82 :     lastbuilddate = "";
     390           82 :     updated = "";
     391           82 :     date = "";
     392           82 :     author = "";
     393           82 :     content = "";
     394           82 :     guid = "";
     395           82 :     id = "";
     396           82 :     mediaImageURL = "";
     397           82 :     mediaImageWidth = 0;
     398           82 : }
     399              : 
     400              : 
     401         2224 : QDateTime ParserXMLWorker::dateFromFeedString(const QString& _timestamp)
     402              : {
     403         2224 :     QDateTime ret; // Defaults to invalid timestamp.
     404              :     
     405              :     // Come up with a few versions of the time stamp.
     406         2224 :     QString timestamp = _timestamp.trimmed();
     407         2224 :     yearFix(timestamp); //IMPORTANT: Must be done *before* weekday name is shaved.
     408         2224 :     shaveWeekdayName(timestamp);
     409         2224 :     monthMassager(timestamp);
     410              :     QString timestamps[] = {
     411              :         timestamp,
     412         4448 :         timestamp.left(timestamp.lastIndexOf(" ")).trimmed(),
     413         4448 :         timestamp.left(timestamp.lastIndexOf(".")).trimmed(),
     414         4448 :         timestamp.left(timestamp.lastIndexOf("-")).trimmed(),
     415         4448 :         timestamp.left(timestamp.lastIndexOf("+")).trimmed(),
     416              :         
     417              :         "" // must be last
     418        26688 :     };
     419              :     
     420              :     // Date time.  Comes in many (ugh) different formats.
     421              :     const QString dateFormats[] = { 
     422              :         // Most typical RSS format
     423              :         // Example: Tue, 02 Jul 2013 01:01:24 +0000 or Sun, 13 Oct 2013 19:15:29  PST
     424              :         // But Fang shaves off weekday names (see above), because they're useless and are often screwed up.
     425              :         "dd MMM yyyy hh:mm:ss",
     426              :         
     427              :         // One-digit minutes (yes, this happens.)
     428              :         "dd MMM yyyy hh:m:ss",
     429              :         
     430              :         // Same as above, but with full months.
     431              :         "dd MMMM yyyy hh:mm:ss",
     432              :         
     433              :         // Full month, one digit minutes.
     434              :         "dd MMMM yyyy hh:m:ss",
     435              :         
     436              :         // Also same as above, but with potentially single-digit days. (Used by "The Hindu".)
     437              :         "d MMM yyyy hh:mm:ss",
     438              :         
     439              :         // RFC 3339, normally used by Atom.
     440              :         // Example: 2013-08-07T16:47:54Z
     441              :         "yyyy-MM-ddThh:mm:ssZ",
     442              :         
     443              :         // Variant of the above without the trailing Z.
     444              :         // Example: 2012-05-30T19:46:42
     445              :         "yyyy-MM-ddThh:mm:ss",
     446              :         
     447              :         // Variant of the above without seconds OR a trailing Z.
     448              :         // Example: 2012-05-30T19:46
     449              :         "yyyy-MM-ddThh:mm",
     450              :         
     451              :         // Format used by some Chinese site.
     452              :         // Example: 2014-02-27 08:26:16.995
     453              :         "yyyy-MM-dd hh:mm:ss",
     454              :         
     455              :         // "Lokmat" uses this custom format.  I provide a single-spaced version for sanity's sake.
     456              :         // Example: 25-02-2014  01:08:10
     457              :         "dd-MM-yyyy  hh:mm:ss",
     458              :         "dd-MM-yyyy hh:mm:ss",
     459              :         
     460              :         
     461              :         "" // must be last!
     462        31136 :     };
     463              :     
     464              :     // Iterate over date formats.
     465         2224 :     int i = 0;
     466         7643 :     while (!ret.isValid() && !dateFormats[i].isEmpty()) {
     467         5419 :         const QString& format = dateFormats[i];
     468              :         
     469              :         // Try each format against each possible manipulated timestamp.
     470         5419 :         int j = 0;
     471        25968 :         while (!ret.isValid() && !timestamps[j].isEmpty()) {
     472        20549 :             QString& ts = timestamps[j];
     473        20549 :             ret = QDateTime::fromString(ts, format);
     474              :             
     475        20549 :             j++;
     476              :         }
     477              :         
     478         5419 :         i++;
     479              :     }
     480              :     
     481              :     // Check if there's a time-based adjustment and/or timezone.
     482              :     // First try numeric offsets in the format of -hhmm, +hhmm, -hh:mm, or +hh:mm.
     483         2224 :     int lastPlus = timestamp.lastIndexOf("+");
     484         2224 :     int lastMinus = timestamp.lastIndexOf("-");
     485         2224 :     if (lastPlus > 3 || lastMinus > 3) {
     486              :         // We have a plus or a minus.
     487         1561 :         int signPos = lastPlus > 3 ? lastPlus : lastMinus;
     488         1561 :         QString sAdjustment = timestamp.right(timestamp.length() - signPos);
     489         1561 :         sAdjustment = sAdjustment.trimmed();
     490              : 
     491              :         // Check for an hour/minute adjustment, in the format of -hhmm or +hhmm
     492              :         // OR in the format of -hh:mm or +hh:mm
     493         2891 :         if ((sAdjustment.length() == 5 || sAdjustment.length() == 6) &&
     494         2891 :                 (sAdjustment.startsWith("+") || sAdjustment.startsWith("-"))) {
     495         1330 :             int adjustment = 0; // Adjustment in minutes.
     496         1330 :             bool containsCol = sAdjustment.contains(':');
     497         1330 :             bool isNum = false;
     498         1330 :             int hours = 0;
     499         1330 :             int minutes = 0;
     500              : 
     501         1330 :             QString sNumber = sAdjustment.right(containsCol ? 5 : 4); // Skip + or -
     502              :             // YES!  We've got an adjustment!
     503         1330 :             hours = sNumber.left(2).toInt(&isNum);
     504         1330 :             if (isNum) {
     505         1330 :                 minutes = sNumber.right(2).toInt(&isNum);
     506              :             }
     507              : 
     508              :             // Looks like we're good!
     509         1330 :             if (isNum) {
     510              :                 // Condense down to minutes.
     511         1330 :                 minutes += (hours * 60);
     512         1330 :                 adjustment = sAdjustment.startsWith("-") ? minutes : -minutes;
     513              : 
     514              :                 // Add in our adjustment if we need it.
     515         1330 :                 ret = ret.addSecs(adjustment * 60 /* seconds */);
     516              :             }
     517         1330 :         }
     518         1561 :     }
     519              : 
     520              :     // Three-letter timezone abbreviations (UTC offset in minutes).
     521              :     static const QMap<QString, int> tzOffsets = {
     522            0 :         {"GMT",    0}, {"UTC",    0},
     523            0 :         {"EST", -300}, {"EDT", -240},
     524            0 :         {"CST", -360}, {"CDT", -300},
     525            0 :         {"MST", -420}, {"MDT", -360},
     526            0 :         {"PST", -480}, {"PDT", -420}
     527         2235 :     };
     528              : 
     529              :     // Check if the timestamp ends with a known abbreviation.
     530         2224 :     QString lastWord = timestamp.section(' ', -1).trimmed().toUpper();
     531         2224 :     if (tzOffsets.contains(lastWord)) {
     532          663 :         int offsetMinutes = tzOffsets.value(lastWord);
     533          663 :         ret = ret.addSecs(-offsetMinutes * 60);
     534              :     }
     535              :     
     536              :     // All times are (supposedly) in UTC.
     537         2224 :     ret.setTimeZone(QTimeZone::UTC);
     538              : 
     539         4448 :     return ret;
     540        48929 : }
     541              : 
     542              : 
     543         2224 : void ParserXMLWorker::yearFix(QString& timestamp)
     544              : {
     545              :     // If the timestamp is something like this:
     546              :     // Tue, 02 Jul 13 [etc]
     547              :     // We want to make it something like this:
     548              :     // Tue, 02 Jul 2013 [etc]
     549         2224 :     if (timestamp.length() == 0 || !timestamp[0].isLetter()) {
     550          335 :         return; // Early exit.
     551              :     }
     552              :     
     553         1889 :     bool seenWeekday = false;
     554         1889 :     bool seenDay = false;
     555         1889 :     bool seenMonth = false;
     556         1889 :     bool seenYear = false;
     557         1889 :     bool hitSpace = true; // This controls whether or not we examine the character.
     558         1889 :     int charsInYear = 0;
     559        32113 :     for (int i = 0; i < timestamp.length(); i++) {
     560        32113 :         if (hitSpace && (timestamp[i].isLetter() || timestamp[i] == ',')) {
     561         3778 :             hitSpace = false; // reset
     562              :             
     563         3778 :             if (!seenWeekday) {
     564         1889 :                 seenWeekday = true;
     565         1889 :             } else if (!seenMonth) {
     566         1889 :                 seenMonth = true;
     567              :             }
     568        28335 :         } else if (hitSpace && (timestamp[i].isDigit())) {
     569         3778 :             hitSpace = false; // reset
     570              :             
     571         3778 :             if (!seenDay) {
     572         1889 :                 seenDay = true;
     573         1889 :             } else if (!seenYear) {
     574         1889 :                 seenYear = true;
     575              :             }
     576        24557 :         } else if (timestamp[i].isSpace()) {
     577         7556 :             if (seenYear) {
     578              :                 // Here's where we find out if we can leave yet.
     579         1889 :                 if (charsInYear != 2) {
     580         1873 :                     break; // Early exit!
     581              :                 } else {
     582              :                     // Sigh... okay, now we have to back up and insert a "20".
     583              :                     // Currently we're here: [Tue, 02 Jul 13 ]
     584           16 :                     timestamp = timestamp.insert(i - 2, "20");
     585           16 :                     return; // YAY! WE DID IT!
     586              :                 }
     587              :             } else {
     588         5667 :                 hitSpace = true;
     589              :             }
     590              :         }
     591              :         
     592        30224 :         if (seenYear) {
     593         7524 :             ++charsInYear;
     594              :         }
     595              :     }
     596              : }
     597              : 
     598              : 
     599         2224 : void ParserXMLWorker::shaveWeekdayName(QString& timestamp)
     600              : {
     601              :     // NOTE:
     602              :     // By the time we've reached this method, the timestamp has
     603              :     // already been trimmed, and we've made sure the year has four digits.
     604              :     
     605         2224 :     int comma = timestamp.indexOf(',');
     606         2224 :     if (comma < 0) {
     607          335 :         return; // Early exit.
     608              :     }
     609              :     
     610              :     // Remove up to and including the comma itself.
     611         1889 :     timestamp = timestamp.remove(0, comma + 1).trimmed();
     612              : }
     613              : 
     614         2224 : void ParserXMLWorker::monthMassager(QString& timestamp)
     615              : {
     616              :     // Add new ones as they're encountered.
     617         2224 :     timestamp = timestamp.replace("Sept ", "Sep ");
     618         2224 : }
     619              : 
     620              : 
     621        18453 : QString ParserXMLWorker::getTagStackAt(qint32 n)
     622              : {
     623              :     // n is from 0..size - 1
     624        18453 :     if (tagStack.isEmpty() || (tagStack.size() - 1) < n)
     625            0 :         return "";
     626              :     
     627        18453 :     return tagStack.at(tagStack.size() - 1 - n);
     628              : }
        

Generated by: LCOV version 2.0-1