diff --git a/config.def.h b/config.def.h index 254e194..5643df7 100644 --- a/config.def.h +++ b/config.def.h @@ -1,3 +1,14 @@ +/* + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with this program. If not, see https://www.gnu.org/licenses/. + +© 2021 dogeystamp +*/ + typedef struct { const char *url; const char *feedName; diff --git a/minrss.c b/minrss.c index 497be5e..c60ee9f 100644 --- a/minrss.c +++ b/minrss.c @@ -1,3 +1,14 @@ +/* + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with this program. If not, see https://www.gnu.org/licenses/. + +© 2021 dogeystamp +*/ + #include #include #include diff --git a/net.c b/net.c index b6dcf3b..3d6ade5 100644 --- a/net.c +++ b/net.c @@ -1,3 +1,14 @@ +/* + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with this program. If not, see https://www.gnu.org/licenses/. + +© 2021 dogeystamp +*/ + #include #include #include diff --git a/net.h b/net.h index 38ef515..00fb6bd 100644 --- a/net.h +++ b/net.h @@ -1,3 +1,14 @@ +/* + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with this program. If not, see https://www.gnu.org/licenses/. + +© 2021 dogeystamp +*/ + #include typedef struct { diff --git a/util.c b/util.c index 2cca268..bb8e9ac 100644 --- a/util.c +++ b/util.c @@ -1,3 +1,14 @@ +/* + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with this program. If not, see https://www.gnu.org/licenses/. + +© 2021 dogeystamp +*/ + #include #include #include diff --git a/util.h b/util.h index 9739e82..b771cb6 100644 --- a/util.h +++ b/util.h @@ -1,3 +1,14 @@ +/* + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with this program. If not, see https://www.gnu.org/licenses/. + +© 2021 dogeystamp +*/ + #define LEN(X) (sizeof(X) / sizeof(X[0])) void logMsg(int argc, char *msg, ...); diff --git a/xml.c b/xml.c index 49b8284..8de2347 100644 --- a/xml.c +++ b/xml.c @@ -1,3 +1,14 @@ +/* + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with this program. If not, see https://www.gnu.org/licenses/. + +© 2021 dogeystamp +*/ + #include #include #include @@ -47,76 +58,163 @@ parseXml(xmlDocPtr doc, rootNode = xmlDocGetRootElement(doc); if (!rootNode) { - logMsg(1, "Empty document for feed. Skipping...\n"); + logMsg(1, "Empty document for feed.\n"); return 1; } - if (!TAGIS(rootNode, "rss")) { - logMsg(1, "XML document is not an RSS feed. Skipping...\n"); + enum feedFormat format = none; + + if (TAGIS(rootNode, "rss")) { + format = rss; + } else if (TAGIS(rootNode, "feed")) { + if (!xmlStrcmp(rootNode->ns->href, (const xmlChar *) "http://www.w3.org/2005/Atom")) + format = atom; + } + + + if (format == none) { + logMsg(1, "XML document is not an RSS or Atom feed.\n"); return 1; } - xmlChar *key; + // Pointer to the first child of the root XML node + xmlNodePtr cur = rootNode->children; - // Get channel XML tag - xmlNodePtr channel = rootNode->children; + switch (format) { + case rss: + // Get channel XML tag + while(cur && !TAGIS(cur, "channel")) + cur = cur->next; - while(channel && !TAGIS(channel, "channel")) - channel = channel->next; + if (!cur || !TAGIS(cur, "channel")) { + logMsg(1, "Invalid RSS syntax.\n"); + return 1; + } - if (!channel || !TAGIS(channel, "channel")) { - logMsg(1, "Invalid RSS syntax. Skipping...\n"); + // Set cur to child of channel + cur = cur->children; + break; + + case atom: + // Set cur to child of feed + cur = rootNode->children; + break; + + default: + logMsg(1, "Missing starting tag for format\n"); + return 1; } - // Pointer to an article xml tag - xmlNodePtr cur = channel->children; - + // Previous item (to build a linked list later) itemStruct *prev = NULL; + // Loop over articles (skipping non-article tags) while (cur) { - key = xmlNodeListGetString(doc, cur->children, 1); + short isArticle = 0; - if (TAGIS(cur, "item")) { + switch (format) { + case rss: + isArticle = TAGIS(cur, "item"); + break; + case atom: + isArticle = TAGIS(cur, "entry"); + break; + default: + logMsg(1, "Missing article tag name for format\n"); + return 1; + } + + if (isArticle) { itemStruct *item = ecalloc(1, sizeof(itemStruct)); + // The selected set of attribute keys + char **attKeys; + + // Struct variables to map attributes to + char **atts[] = { + &item->title, + &item->link, + &item->description, + }; + + // Attribute keys for each format + + char *attKeysRss[LEN(atts)] = { + "title", + "link", + "description", + }; + + char *attKeysAtom[LEN(atts)] = { + "title", + // link has special treatment because its value is in href not within the tag + "", + "content", + }; + + switch (format) { + case rss: + attKeys = attKeysRss; + break; + + case atom: + attKeys = attKeysAtom; + break; + + default: + logMsg(1, "Missing article attribute keys for format\n"); + return 1; + }; + // Build a linked list of item structs to pass to itemAction() item->next = prev; prev = item; xmlNodePtr itemNode = cur->children; - while (itemNode) { - char *itemKey = (char *)xmlNodeListGetString(doc, itemNode->children, 1); + // Value within the tag + char *itemKey; - char *attKeys[] = { - "title", - "link", - "description", - }; - char **atts[] = { - &item->title, - &item->link, - &item->description, - }; + while (itemNode) { + itemKey = (char *)xmlNodeListGetString(doc, itemNode->children, 1); if (itemKey) { - for (unsigned long int i = 0; i < LEN(attKeys); i++) { + for (unsigned long int i = 0; i < LEN(atts); i++) { if (TAGIS(itemNode, attKeys[i])) { *atts[i] = ecalloc(strlen(itemKey) + 1, sizeof(char)); strcpy(*atts[i], itemKey); + + break; } } xmlFree(itemKey); } + + // Exceptions + + // Atom entry link tag + if (format == atom && TAGIS(itemNode, "link")) { + xmlChar *link = xmlGetProp(itemNode, (xmlChar *) "href"); + + if (!link) { + logMsg(1, "Missing Atom entry link\n"); + xmlFree(link); + return 1; + } + + item->link = ecalloc(strlen((char *) link) + 1, sizeof(char)); + strcpy(item->link, (char *) link); + + xmlFree(link); + } itemNode = itemNode->next; } } - xmlFree(key); cur = cur->next; } @@ -150,6 +248,9 @@ readDoc(char *content, int stat = parseXml(doc, feedName, itemAction); + if (stat) + logMsg(1, "Skipped feed %s due to errors.\n", feedName); + xmlFreeDoc(doc); return stat; diff --git a/xml.h b/xml.h index 46f5891..c162e07 100644 --- a/xml.h +++ b/xml.h @@ -1,3 +1,14 @@ +/* + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with this program. If not, see https://www.gnu.org/licenses/. + +© 2021 dogeystamp +*/ + typedef struct itemStruct itemStruct; struct itemStruct { char *title; @@ -6,6 +17,12 @@ struct itemStruct { itemStruct *next; }; +enum feedFormat { + none, + rss, + atom +}; + void freeItem(itemStruct *item); int readDoc(