From 453e36317ff6a5e212ad73b278ce28c2963355d9 Mon Sep 17 00:00:00 2001 From: dogeystamp Date: Thu, 27 Oct 2022 20:36:51 -0400 Subject: [PATCH] xml.c: merge into minrss.c --- Makefile | 2 +- minrss.c | 258 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- xml.c | 258 ------------------------------------------------------- xml.h | 31 ------- 4 files changed, 258 insertions(+), 291 deletions(-) delete mode 100644 xml.c delete mode 100644 xml.h diff --git a/Makefile b/Makefile index dfdf148..e33c812 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ PREFIX = ~/.local VERSION = 0.1 # CC = cc -SRC = minrss.c util.c net.c xml.c +SRC = minrss.c util.c net.c OBJ = $(SRC:.c=.o) PKG_CONFIG = pkg-config CURL_CONFIG = curl-config diff --git a/minrss.c b/minrss.c index c8197a2..141af1a 100644 --- a/minrss.c +++ b/minrss.c @@ -13,11 +13,267 @@ You should have received a copy of the GNU General Public License along with thi #include #include #include +#include +#include +#include +#include +#include +#include #include "config.h" #include "util.h" #include "net.h" -#include "xml.h" + +#define TAGIS(X, Y) (!xmlStrcmp(X->name, (const xmlChar *) Y)) + +enum feedFormat { + NONE, + RSS, + ATOM, +}; + +typedef struct itemStruct itemStruct; +struct itemStruct { + char *title; + char *link; + char *description; + itemStruct *next; +}; + +void +freeItem(itemStruct *item) +{ + // Free the memory used by an article. + + char **mems[] = { + &item->title, + &item->link, + &item->description, + }; + + for (unsigned long int i = 0; i < LEN(mems); i++) { + if (*mems[i]) + free(*mems[i]); + } + + free(item); +} + +static int +parseXml(xmlDocPtr doc, + const char *feedName, + void itemAction(itemStruct *, const char *)) +{ + // Parse the XML in a single document. + + if (!feedName || !feedName[0]) { + logMsg(1, "Missing feed name, please set one.\n"); + return 1; + } + + xmlNodePtr rootNode; + + rootNode = xmlDocGetRootElement(doc); + + if (!rootNode) { + logMsg(1, "Empty document for feed.\n"); + return 1; + } + + enum feedFormat format = NONE; + + if (TAGIS(rootNode, "rss")) { + format = RSS; + } else if (TAGIS(rootNode, "feed")) { + if (!xmlStrcmp(rootNode->ns->href, (const xmlChar *) "http://www.w3.org/2005/Atom")) + format = ATOM; + } + + + if (format == NONE) { + logMsg(1, "XML document is not an RSS or Atom feed.\n"); + return 1; + } + + // Pointer to the first child of the root XML node + xmlNodePtr cur = rootNode->children; + + switch (format) { + case RSS: + // Get channel XML tag + while(cur && !TAGIS(cur, "channel")) + cur = cur->next; + + if (!cur || !TAGIS(cur, "channel")) { + logMsg(1, "Invalid RSS syntax.\n"); + return 1; + } + + // Set cur to child of channel + cur = cur->children; + break; + + case ATOM: + // Set cur to child of feed + cur = rootNode->children; + break; + + default: + logMsg(1, "Missing starting tag for format\n"); + return 1; + } + + // Previous item (to build a linked list later) + itemStruct *prev = NULL; + + // Loop over articles (skipping non-article tags) + while (cur) { + + short isArticle = 0; + + switch (format) { + case RSS: + isArticle = TAGIS(cur, "item"); + break; + case ATOM: + isArticle = TAGIS(cur, "entry"); + break; + default: + logMsg(1, "Missing article tag name for format\n"); + return 1; + } + + if (isArticle) { + itemStruct *item = ecalloc(1, sizeof(itemStruct)); + + // The selected set of attribute keys + char **attKeys; + + // Struct variables to map attributes to + char **atts[] = { + &item->title, + &item->link, + &item->description, + }; + + // Attribute keys for each format + + char *attKeysRss[LEN(atts)] = { + "title", + "link", + "description", + }; + + char *attKeysAtom[LEN(atts)] = { + "title", + // link has special treatment because its value is in href not within the tag + "", + "content", + }; + + switch (format) { + case RSS: + attKeys = attKeysRss; + break; + + case ATOM: + attKeys = attKeysAtom; + break; + + default: + logMsg(1, "Missing article attribute keys for format\n"); + return 1; + }; + + // Build a linked list of item structs to pass to itemAction() + item->next = prev; + prev = item; + + xmlNodePtr itemNode = cur->children; + + // Value within the tag + char *itemKey; + + while (itemNode) { + itemKey = (char *)xmlNodeListGetString(doc, itemNode->children, 1); + + if (itemKey) { + for (unsigned long int i = 0; i < LEN(atts); i++) { + if (TAGIS(itemNode, attKeys[i])) { + size_t keyLen = strlen(itemKey) + 1; + *atts[i] = ecalloc(keyLen, sizeof(char)); + memcpy(*atts[i], itemKey, keyLen * sizeof(char)); + + break; + } + } + + xmlFree(itemKey); + } + + // Exceptions + + // Atom entry link tag + if (format == ATOM && TAGIS(itemNode, "link")) { + xmlChar *link = xmlGetProp(itemNode, (xmlChar *) "href"); + + if (!link) { + logMsg(1, "Missing Atom entry link\n"); + xmlFree(link); + return 1; + } + + size_t linkLen = strlen((char *) link) + 1; + item->link = ecalloc(linkLen, sizeof(char)); + memcpy(item->link, (char *) link, linkLen * sizeof(char)); + + xmlFree(link); + } + + itemNode = itemNode->next; + } + } + + cur = cur->next; + } + + errno = 0; + int stat = mkdir((const char* ) feedName, S_IRWXU); + + if (!stat && errno && errno != EEXIST) { + logMsg(1, "Error creating directory for feed.\n"); + return 1; + } + + itemAction(prev, feedName); + + return 0; +} + +int +readDoc(char *content, + const char *feedName, + void itemAction(itemStruct *, const char *)) +{ + // Initialize the XML document, read it, then free it + + xmlDocPtr doc; + + doc = xmlReadMemory(content, strlen(content), "noname.xml", NULL, 0); + if (!doc) { + logMsg(1, "XML parser error.\n"); + return 1; + } + + int stat = parseXml(doc, feedName, itemAction); + + if (stat) + logMsg(1, "Skipped feed %s due to errors.\n", feedName); + + xmlFreeDoc(doc); + + return stat; +} void itemAction(itemStruct *item, const char *folder) diff --git a/xml.c b/xml.c deleted file mode 100644 index 03efffb..0000000 --- a/xml.c +++ /dev/null @@ -1,258 +0,0 @@ -/* - -This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this program. If not, see https://www.gnu.org/licenses/. - -© 2021 dogeystamp -*/ - -#include -#include -#include -#include -#include -#include -#include - -#include "util.h" -#include "xml.h" - -#define TAGIS(X, Y) (!xmlStrcmp(X->name, (const xmlChar *) Y)) - -void -freeItem(itemStruct *item) -{ - // Free the memory used by an article. - - char **mems[] = { - &item->title, - &item->link, - &item->description, - }; - - for (unsigned long int i = 0; i < LEN(mems); i++) { - if (*mems[i]) - free(*mems[i]); - } - - free(item); -} - -static int -parseXml(xmlDocPtr doc, - const char *feedName, - void itemAction(itemStruct *, const char *)) -{ - // Parse the XML in a single document. - - if (!feedName || !feedName[0]) { - logMsg(1, "Missing feed name, please set one.\n"); - return 1; - } - - xmlNodePtr rootNode; - - rootNode = xmlDocGetRootElement(doc); - - if (!rootNode) { - logMsg(1, "Empty document for feed.\n"); - return 1; - } - - enum feedFormat format = NONE; - - if (TAGIS(rootNode, "rss")) { - format = RSS; - } else if (TAGIS(rootNode, "feed")) { - if (!xmlStrcmp(rootNode->ns->href, (const xmlChar *) "http://www.w3.org/2005/Atom")) - format = ATOM; - } - - - if (format == NONE) { - logMsg(1, "XML document is not an RSS or Atom feed.\n"); - return 1; - } - - // Pointer to the first child of the root XML node - xmlNodePtr cur = rootNode->children; - - switch (format) { - case RSS: - // Get channel XML tag - while(cur && !TAGIS(cur, "channel")) - cur = cur->next; - - if (!cur || !TAGIS(cur, "channel")) { - logMsg(1, "Invalid RSS syntax.\n"); - return 1; - } - - // Set cur to child of channel - cur = cur->children; - break; - - case ATOM: - // Set cur to child of feed - cur = rootNode->children; - break; - - default: - logMsg(1, "Missing starting tag for format\n"); - return 1; - } - - // Previous item (to build a linked list later) - itemStruct *prev = NULL; - - // Loop over articles (skipping non-article tags) - while (cur) { - - short isArticle = 0; - - switch (format) { - case RSS: - isArticle = TAGIS(cur, "item"); - break; - case ATOM: - isArticle = TAGIS(cur, "entry"); - break; - default: - logMsg(1, "Missing article tag name for format\n"); - return 1; - } - - if (isArticle) { - itemStruct *item = ecalloc(1, sizeof(itemStruct)); - - // The selected set of attribute keys - char **attKeys; - - // Struct variables to map attributes to - char **atts[] = { - &item->title, - &item->link, - &item->description, - }; - - // Attribute keys for each format - - char *attKeysRss[LEN(atts)] = { - "title", - "link", - "description", - }; - - char *attKeysAtom[LEN(atts)] = { - "title", - // link has special treatment because its value is in href not within the tag - "", - "content", - }; - - switch (format) { - case RSS: - attKeys = attKeysRss; - break; - - case ATOM: - attKeys = attKeysAtom; - break; - - default: - logMsg(1, "Missing article attribute keys for format\n"); - return 1; - }; - - // Build a linked list of item structs to pass to itemAction() - item->next = prev; - prev = item; - - xmlNodePtr itemNode = cur->children; - - // Value within the tag - char *itemKey; - - while (itemNode) { - itemKey = (char *)xmlNodeListGetString(doc, itemNode->children, 1); - - if (itemKey) { - for (unsigned long int i = 0; i < LEN(atts); i++) { - if (TAGIS(itemNode, attKeys[i])) { - size_t keyLen = strlen(itemKey) + 1; - *atts[i] = ecalloc(keyLen, sizeof(char)); - memcpy(*atts[i], itemKey, keyLen * sizeof(char)); - - break; - } - } - - xmlFree(itemKey); - } - - // Exceptions - - // Atom entry link tag - if (format == ATOM && TAGIS(itemNode, "link")) { - xmlChar *link = xmlGetProp(itemNode, (xmlChar *) "href"); - - if (!link) { - logMsg(1, "Missing Atom entry link\n"); - xmlFree(link); - return 1; - } - - size_t linkLen = strlen((char *) link) + 1; - item->link = ecalloc(linkLen, sizeof(char)); - memcpy(item->link, (char *) link, linkLen * sizeof(char)); - - xmlFree(link); - } - - itemNode = itemNode->next; - } - } - - cur = cur->next; - } - - errno = 0; - int stat = mkdir((const char* ) feedName, S_IRWXU); - - if (!stat && errno && errno != EEXIST) { - logMsg(1, "Error creating directory for feed.\n"); - return 1; - } - - itemAction(prev, feedName); - - return 0; -} - -int -readDoc(char *content, - const char *feedName, - void itemAction(itemStruct *, const char *)) -{ - // Initialize the XML document, read it, then free it - - xmlDocPtr doc; - - doc = xmlReadMemory(content, strlen(content), "noname.xml", NULL, 0); - if (!doc) { - logMsg(1, "XML parser error.\n"); - return 1; - } - - int stat = parseXml(doc, feedName, itemAction); - - if (stat) - logMsg(1, "Skipped feed %s due to errors.\n", feedName); - - xmlFreeDoc(doc); - - return stat; -} diff --git a/xml.h b/xml.h deleted file mode 100644 index 1ce64b5..0000000 --- a/xml.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - -This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this program. If not, see https://www.gnu.org/licenses/. - -© 2021 dogeystamp -*/ - -typedef struct itemStruct itemStruct; -struct itemStruct { - char *title; - char *link; - char *description; - itemStruct *next; -}; - -enum feedFormat { - NONE, - RSS, - ATOM, -}; - -void freeItem(itemStruct *item); - -int readDoc( - char *content, - const char *feedName, - void itemAction(itemStruct *, char const *chanTitle));