LCOV - code coverage report
Current view: top level - src/utilities - PageMetadataExtractor.cpp (source / functions) Coverage Total Hit
Test: coverage.info.cleaned Lines: 100.0 % 55 55
Test Date: 2026-03-23 10:19:47 Functions: 100.0 % 1 1

            Line data    Source code
       1              : #include "PageMetadataExtractor.h"
       2              : 
       3              : #include <QXmlStreamReader>
       4              : 
       5            8 : PageMetadata PageMetadataExtractor::extract(const QString& xhtml)
       6              : {
       7            8 :     PageMetadata meta;
       8              : 
       9            8 :     QString ogTitle;
      10            8 :     QString ogDescription;
      11            8 :     QUrl ogImage;
      12            8 :     QString ogAuthor;
      13            8 :     QString htmlTitle;
      14            8 :     QString metaDescription;
      15            8 :     QString metaAuthor;
      16            8 :     QString htmlLang;
      17              : 
      18            8 :     QXmlStreamReader xml(xhtml);
      19              : 
      20          401 :     while (!xml.atEnd()) {
      21          393 :         xml.readNext();
      22              : 
      23          393 :         if (xml.isStartElement()) {
      24          127 :             QString tag = xml.name().toString().toLower();
      25              : 
      26              :             // Extract lang from <html lang="...">.
      27          127 :             if (tag == "html") {
      28            8 :                 QXmlStreamAttributes attrs = xml.attributes();
      29            8 :                 if (attrs.hasAttribute("lang")) {
      30            3 :                     htmlLang = attrs.value("lang").toString().toLower();
      31              :                 }
      32            8 :             }
      33              : 
      34              :             // Stop at <body> -- we only care about <head>.
      35          127 :             if (tag == "body") {
      36            8 :                 break;
      37              :             }
      38              : 
      39          119 :             if (tag == "title") {
      40            7 :                 htmlTitle = xml.readElementText().trimmed();
      41          112 :             } else if (tag == "meta") {
      42           33 :                 QXmlStreamAttributes attrs = xml.attributes();
      43              : 
      44              :                 // OpenGraph tags: <meta property="og:..." content="...">
      45           46 :                 if (attrs.hasAttribute("property") && attrs.hasAttribute("content")) {
      46           13 :                     QString property = attrs.value("property").toString().toLower();
      47           13 :                     QString content = attrs.value("content").toString();
      48              : 
      49           13 :                     if (property == "og:title") {
      50            2 :                         ogTitle = content;
      51           11 :                     } else if (property == "og:description") {
      52            2 :                         ogDescription = content;
      53            9 :                     } else if (property == "og:image") {
      54            2 :                         ogImage = QUrl(content);
      55            7 :                     } else if (property == "article:author") {
      56            1 :                         ogAuthor = content;
      57              :                     }
      58           13 :                 }
      59              : 
      60              :                 // Standard meta tags: <meta name="..." content="...">
      61           51 :                 if (attrs.hasAttribute("name") && attrs.hasAttribute("content")) {
      62           18 :                     QString name = attrs.value("name").toString().toLower();
      63           18 :                     QString content = attrs.value("content").toString();
      64              : 
      65           18 :                     if (name == "description") {
      66            3 :                         metaDescription = content;
      67           15 :                     } else if (name == "author") {
      68            1 :                         metaAuthor = content;
      69              :                     }
      70           18 :                 }
      71           33 :             }
      72          127 :         }
      73              :     }
      74              : 
      75              :     // Apply priority: OG > standard HTML.
      76            8 :     meta.title = !ogTitle.isEmpty() ? ogTitle : htmlTitle;
      77            8 :     meta.description = !ogDescription.isEmpty() ? ogDescription : metaDescription;
      78            8 :     meta.imageUrl = ogImage;
      79            8 :     meta.author = !ogAuthor.isEmpty() ? ogAuthor : metaAuthor;
      80            8 :     meta.lang = htmlLang;
      81              : 
      82           16 :     return meta;
      83            8 : }
        

Generated by: LCOV version 2.0-1