LCOV - code coverage report
Current view: top level - src/utilities - RawFeedRewriter.h (source / functions) Coverage Total Hit
Test: coverage.info.cleaned Lines: 85.7 % 7 6
Test Date: 2026-01-27 22:31:25 Functions: 50.0 % 2 1

            Line data    Source code
       1              : #ifndef RAWFEEDIMAGESIZEREWRITER_H
       2              : #define RAWFEEDIMAGESIZEREWRITER_H
       3              : 
       4              : #include <QObject>
       5              : #include <QList>
       6              : 
       7              : #include "../parser/RawNews.h"
       8              : #include "../FangObject.h"
       9              : #include "ImageGrabber.h"
      10              : #include "WebPageGrabber.h"
      11              : 
      12              : // Represents a DOM node.
      13              : class DOMNode {
      14              : public:
      15          158 :     DOMNode(QString tagName, int intID) :
      16          158 :         tagName(tagName),
      17          158 :         intID(intID),
      18          158 :         nonEmptyTextCount(0),
      19          158 :         numChildren(0)
      20          158 :     {}
      21              : 
      22              :     // Stack requires a default c'tor
      23              :     DOMNode() :
      24              :         intID(0),
      25              :         nonEmptyTextCount(0),
      26              :         numChildren(0)
      27              :     {}
      28              : 
      29              :     QString tagName;
      30              :     int intID;
      31              :     int nonEmptyTextCount;
      32              :     int numChildren;
      33              : };
      34              : 
      35              : /**
      36              :  * @brief Takes a "raw" HTML feed and processes it in the following ways:
      37              :  *          - Tidy'd into XHTML fragments
      38              :  *          - Image sizes are baked in
      39              :  *          - Javascript is stripped
      40              :  *          - Common social media buttons removed
      41              :  *          - Tracking pixels?  Nope.
      42              :  */
      43              : class RawFeedRewriter : public FangObject
      44              : {
      45              :     Q_OBJECT
      46              : public:
      47              :     explicit RawFeedRewriter(QObject *parent = nullptr);
      48              :     
      49              :     
      50              : signals:
      51              :     /**
      52              :      * @brief We're done!  The feed you passed in as been modified.
      53              :      */
      54              :     void finished();
      55              :     
      56              : public slots:
      57              :     
      58              :     void rewrite(QList<RawNews*>* newsList);
      59              :     
      60              :     /**
      61              :      * @return 
      62              :      */
      63            0 :     inline QList<RawNews*>* getNewsList() { return newsList; }
      64              : 
      65              : protected:
      66              :     // Returns true if the text in a node is just whitespace.
      67              :     bool isHTMLEmpty(QString html);
      68              : 
      69              :     // Check whether we're looking at a share button URL.
      70              :     bool isShareURL(const QString& url);
      71              : 
      72              :     // Turns an int into an ID.
      73              :     QString intToID(int id);
      74              : 
      75              :     // First pass rewriter.
      76              :     QString rewriteFirstPass(const QString& document, QSet<QUrl>& imageURLs);
      77              : 
      78              :     // Calls rewriteSecondPass() on all news HTML.
      79              :     void rewriteAllSecondPass();
      80              : 
      81              :     // Same as above, but this takes care of the images (if needed) and deletes empty elements.
      82              :     QString rewriteSecondPass(QString& docString);
      83              : 
      84              :     // Post-process our news list.
      85              :     void postProcess();
      86              : 
      87              :     // Remove headers, footers, and other garbage.
      88              :     void postProcessDocString(QString& docString);
      89              : 
      90              :     // Resizes image dimensions.
      91              :     void imageResize(int width, int height, int* newWidth, int* newHeight);
      92              : 
      93              :     // Removes excessive newlines.
      94              :     void removeNewlinesBothSides(QString& docString);
      95              : 
      96              :     // Some news is text instead of HTML (Hearst's papers, for example.)
      97              :     QString rewriteTextOnlyNews(QString input);
      98              : 
      99              : protected slots:
     100              :     // We've grabbed our images.
     101              :     void onImageGrabberFinished();
     102              : 
     103              :     
     104              : private:
     105              :     // The current news list.
     106              :     QList<RawNews*>* newsList;
     107              : 
     108              :     // Web page grabber.
     109              :     WebPageGrabber webPageGrabber;
     110              : 
     111              :     // Image grabber!  GRAB GRAB GRAB
     112              :     ImageGrabber imageGrabber;
     113              : 
     114              :     // Setup.
     115              :     QSet<QString> tagsToRemove;
     116              :     QSet<QString> classesToRemove;
     117              :     QList<QString> shareButtonURLs;
     118              :     QSet<QString> containerTags;
     119              :     
     120              :     // Element IDs.
     121              :     QSet<QString> idsToDelete;
     122              :     int intID;
     123              : };
     124              : 
     125              : #endif // RAWFEEDIMAGESIZEREWRITER_H
        

Generated by: LCOV version 2.0-1