LCOV - code coverage report
Current view: top level - lib/FangFeedParser - FeedDateParser.cpp (source / functions) Coverage Total Hit
Test: coverage.info.cleaned Lines: 94.7 % 95 90
Test Date: 2026-04-19 00:35:54 Functions: 100.0 % 4 4

            Line data    Source code
       1              : #include "FeedDateParser.h"
       2              : #include <QMap>
       3              : #include <QtCore/qtimezone.h>
       4              : 
       5         2235 : QDateTime FeedDateParser::dateFromFeedString(const QString& _timestamp)
       6              : {
       7         2235 :     QDateTime ret; // Defaults to invalid timestamp.
       8              : 
       9              :     // Come up with a few versions of the time stamp.
      10         2235 :     QString timestamp = _timestamp.trimmed();
      11         2235 :     yearFix(timestamp); // IMPORTANT: Must be done *before* weekday name is shaved.
      12         2235 :     shaveWeekdayName(timestamp);
      13         2235 :     monthMassager(timestamp);
      14              :     QString timestamps[] = {
      15              :         timestamp,
      16         4470 :         timestamp.left(timestamp.lastIndexOf(" ")).trimmed(),
      17         4470 :         timestamp.left(timestamp.lastIndexOf(".")).trimmed(),
      18         4470 :         timestamp.left(timestamp.lastIndexOf("-")).trimmed(),
      19         4470 :         timestamp.left(timestamp.lastIndexOf("+")).trimmed(),
      20              : 
      21              :         "" // must be last
      22        26820 :     };
      23              : 
      24              :     // Date time.  Comes in many (ugh) different formats.
      25              :     const QString dateFormats[] = {
      26              :         // Most typical RSS format
      27              :         // Example: Tue, 02 Jul 2013 01:01:24 +0000 or Sun, 13 Oct 2013 19:15:29  PST
      28              :         // But Fang shaves off weekday names (see above), because they're useless and are often screwed up.
      29              :         "dd MMM yyyy hh:mm:ss",
      30              : 
      31              :         // One-digit minutes (yes, this happens.)
      32              :         "dd MMM yyyy hh:m:ss",
      33              : 
      34              :         // Same as above, but with full months.
      35              :         "dd MMMM yyyy hh:mm:ss",
      36              : 
      37              :         // Full month, one digit minutes.
      38              :         "dd MMMM yyyy hh:m:ss",
      39              : 
      40              :         // Also same as above, but with potentially single-digit days. (Used by "The Hindu".)
      41              :         "d MMM yyyy hh:mm:ss",
      42              : 
      43              :         // RFC 3339, normally used by Atom.
      44              :         // Example: 2013-08-07T16:47:54Z
      45              :         "yyyy-MM-ddThh:mm:ssZ",
      46              : 
      47              :         // Variant of the above without the trailing Z.
      48              :         // Example: 2012-05-30T19:46:42
      49              :         "yyyy-MM-ddThh:mm:ss",
      50              : 
      51              :         // Variant of the above without seconds OR a trailing Z.
      52              :         // Example: 2012-05-30T19:46
      53              :         "yyyy-MM-ddThh:mm",
      54              : 
      55              :         // Format used by some Chinese site.
      56              :         // Example: 2014-02-27 08:26:16.995
      57              :         "yyyy-MM-dd hh:mm:ss",
      58              : 
      59              :         // "Lokmat" uses this custom format.  I provide a single-spaced version for sanity's sake.
      60              :         // Example: 25-02-2014  01:08:10
      61              :         "dd-MM-yyyy  hh:mm:ss",
      62              :         "dd-MM-yyyy hh:mm:ss",
      63              : 
      64              : 
      65              :         "" // must be last!
      66        31290 :     };
      67              : 
      68              :     // Iterate over date formats.
      69         2235 :     int i = 0;
      70         7680 :     while (!ret.isValid() && !dateFormats[i].isEmpty()) {
      71         5445 :         const QString& format = dateFormats[i];
      72              : 
      73              :         // Try each format against each possible manipulated timestamp.
      74         5445 :         int j = 0;
      75        26038 :         while (!ret.isValid() && !timestamps[j].isEmpty()) {
      76        20593 :             QString& ts = timestamps[j];
      77        20593 :             ret = QDateTime::fromString(ts, format);
      78              : 
      79        20593 :             j++;
      80              :         }
      81              : 
      82         5445 :         i++;
      83              :     }
      84              : 
      85              :     // Check if there's a time-based adjustment and/or timezone.
      86              :     // First try numeric offsets in the format of -hhmm, +hhmm, -hh:mm, or +hh:mm.
      87         2235 :     int lastPlus = timestamp.lastIndexOf("+");
      88         2235 :     int lastMinus = timestamp.lastIndexOf("-");
      89         2235 :     if (lastPlus > 3 || lastMinus > 3) {
      90              :         // We have a plus or a minus.
      91         1568 :         int signPos = lastPlus > 3 ? lastPlus : lastMinus;
      92         1568 :         QString sAdjustment = timestamp.right(timestamp.length() - signPos);
      93         1568 :         sAdjustment = sAdjustment.trimmed();
      94              : 
      95              :         // Check for an hour/minute adjustment, in the format of -hhmm or +hhmm
      96              :         // OR in the format of -hh:mm or +hh:mm
      97         2904 :         if ((sAdjustment.length() == 5 || sAdjustment.length() == 6) &&
      98         2904 :                 (sAdjustment.startsWith("+") || sAdjustment.startsWith("-"))) {
      99         1336 :             bool containsCol = sAdjustment.contains(':');
     100         1336 :             bool isNum = false;
     101         1336 :             int hours = 0;
     102         1336 :             int minutes = 0;
     103              : 
     104         1336 :             QString sNumber = sAdjustment.right(containsCol ? 5 : 4); // Skip + or -
     105         1336 :             hours = sNumber.left(2).toInt(&isNum);
     106         1336 :             if (isNum) {
     107         1336 :                 minutes = sNumber.right(2).toInt(&isNum);
     108              :             }
     109              : 
     110         1336 :             if (isNum) {
     111              :                 // Condense down to minutes.
     112         1336 :                 minutes += (hours * 60);
     113         1336 :                 int adjustment = sAdjustment.startsWith("-") ? minutes : -minutes;
     114              : 
     115              :                 // Add in our adjustment if we need it.
     116         1336 :                 ret = ret.addSecs(adjustment * 60 /* seconds */);
     117              :             }
     118         1336 :         }
     119         1568 :     }
     120              : 
     121              :     // Three-letter timezone abbreviations (UTC offset in minutes).
     122              :     static const QMap<QString, int> tzOffsets = {
     123            0 :         {"GMT",    0}, {"UTC",    0},
     124            0 :         {"EST", -300}, {"EDT", -240},
     125            0 :         {"CST", -360}, {"CDT", -300},
     126            0 :         {"MST", -420}, {"MDT", -360},
     127            0 :         {"PST", -480}, {"PDT", -420}
     128         2246 :     };
     129              : 
     130              :     // Check if the timestamp ends with a known abbreviation.
     131         2235 :     QString lastWord = timestamp.section(' ', -1).trimmed().toUpper();
     132         2235 :     if (tzOffsets.contains(lastWord)) {
     133          666 :         int offsetMinutes = tzOffsets.value(lastWord);
     134          666 :         ret = ret.addSecs(-offsetMinutes * 60);
     135              :     }
     136              : 
     137              :     // All times are (supposedly) in UTC.
     138         2235 :     ret.setTimeZone(QTimeZone::UTC);
     139              : 
     140         4470 :     return ret;
     141        49171 : }
     142              : 
     143              : 
     144         2235 : void FeedDateParser::yearFix(QString& timestamp)
     145              : {
     146              :     // If the timestamp is something like this:
     147              :     // Tue, 02 Jul 13 [etc]
     148              :     // We want to make it something like this:
     149              :     // Tue, 02 Jul 2013 [etc]
     150         2235 :     if (timestamp.length() == 0 || !timestamp[0].isLetter()) {
     151          337 :         return; // Early exit.
     152              :     }
     153              : 
     154         1898 :     bool seenWeekday = false;
     155         1898 :     bool seenDay = false;
     156         1898 :     bool seenMonth = false;
     157         1898 :     bool seenYear = false;
     158         1898 :     bool hitSpace = true; // This controls whether or not we examine the character.
     159         1898 :     int charsInYear = 0;
     160        32266 :     for (int i = 0; i < timestamp.length(); i++) {
     161        32266 :         if (hitSpace && (timestamp[i].isLetter() || timestamp[i] == ',')) {
     162         3796 :             hitSpace = false; // reset
     163              : 
     164         3796 :             if (!seenWeekday) {
     165         1898 :                 seenWeekday = true;
     166         1898 :             } else if (!seenMonth) {
     167         1898 :                 seenMonth = true;
     168              :             }
     169        28470 :         } else if (hitSpace && (timestamp[i].isDigit())) {
     170         3796 :             hitSpace = false; // reset
     171              : 
     172         3796 :             if (!seenDay) {
     173         1898 :                 seenDay = true;
     174         1898 :             } else if (!seenYear) {
     175         1898 :                 seenYear = true;
     176              :             }
     177        24674 :         } else if (timestamp[i].isSpace()) {
     178         7592 :             if (seenYear) {
     179              :                 // Here's where we find out if we can leave yet.
     180         1898 :                 if (charsInYear != 2) {
     181         1882 :                     break; // Early exit!
     182              :                 } else {
     183              :                     // Sigh... okay, now we have to back up and insert a "20".
     184              :                     // Currently we're here: [Tue, 02 Jul 13 ]
     185           16 :                     timestamp = timestamp.insert(i - 2, "20");
     186           16 :                     return; // YAY! WE DID IT!
     187              :                 }
     188              :             } else {
     189         5694 :                 hitSpace = true;
     190              :             }
     191              :         }
     192              : 
     193        30368 :         if (seenYear) {
     194         7560 :             ++charsInYear;
     195              :         }
     196              :     }
     197              : }
     198              : 
     199              : 
     200         2235 : void FeedDateParser::shaveWeekdayName(QString& timestamp)
     201              : {
     202              :     // NOTE:
     203              :     // By the time we've reached this method, the timestamp has
     204              :     // already been trimmed, and we've made sure the year has four digits.
     205              : 
     206         2235 :     int comma = timestamp.indexOf(',');
     207         2235 :     if (comma < 0) {
     208          337 :         return; // Early exit.
     209              :     }
     210              : 
     211              :     // Remove up to and including the comma itself.
     212         1898 :     timestamp = timestamp.remove(0, comma + 1).trimmed();
     213              : }
     214              : 
     215         2235 : void FeedDateParser::monthMassager(QString& timestamp)
     216              : {
     217              :     // Add new ones as they're encountered.
     218         2235 :     timestamp = timestamp.replace("Sept ", "Sep ");
     219         2235 : }
        

Generated by: LCOV version 2.0-1