minrss/xml.c

157 lines
2.9 KiB
C
Raw Normal View History

2021-08-02 11:41:15 -04:00
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <libxml/xmlreader.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include <string.h>
#include "util.h"
#include "xml.h"
#define TAGIS(X, Y) (!xmlStrcmp(X->name, (const xmlChar *) Y))
void
freeItem(itemStruct *item)
{
2021-09-22 18:50:42 -04:00
// Free the memory used by an article.
2021-08-02 11:41:15 -04:00
char **mems[] = {
&item->title,
&item->link,
&item->description,
};
for (unsigned long int i = 0; i < LEN(mems); i++) {
if (*mems[i])
free(*mems[i]);
}
free(item);
}
static int
parseXml(xmlDocPtr doc,
2021-08-06 14:32:04 -04:00
const char *feedName,
void itemAction(itemStruct *, const char *))
2021-08-02 11:41:15 -04:00
{
2021-09-22 18:50:42 -04:00
// Parse the XML in a single document.
2021-08-02 11:41:15 -04:00
if (!feedName || !feedName[0]) {
logMsg(1, "Missing feed name, please set one.\n");
return 1;
}
2021-08-06 14:32:04 -04:00
xmlNodePtr rootNode;
2021-08-02 11:41:15 -04:00
2021-08-06 14:32:04 -04:00
rootNode = xmlDocGetRootElement(doc);
2021-08-02 11:41:15 -04:00
2021-08-06 14:32:04 -04:00
if (!rootNode) {
logMsg(1, "Empty document for feed. Skipping...\n");
return 1;
}
2021-08-02 11:41:15 -04:00
2021-08-06 14:32:04 -04:00
if (!TAGIS(rootNode, "rss")) {
logMsg(1, "XML document is not an RSS feed. Skipping...\n");
return 1;
}
2021-08-02 11:41:15 -04:00
2021-08-06 14:32:04 -04:00
xmlChar *key;
2021-08-02 11:41:15 -04:00
2021-08-06 14:32:04 -04:00
// Get channel XML tag
xmlNodePtr channel = rootNode->children;
2021-08-02 11:41:15 -04:00
while(channel && !TAGIS(channel, "channel"))
channel = channel->next;
2021-08-06 14:32:04 -04:00
if (!channel || !TAGIS(channel, "channel")) {
logMsg(1, "Invalid RSS syntax. Skipping...\n");
}
2021-08-02 11:41:15 -04:00
2021-09-22 18:50:42 -04:00
// Pointer to an article xml tag
2021-08-06 14:32:04 -04:00
xmlNodePtr cur = channel->children;
2021-08-02 11:41:15 -04:00
itemStruct *prev = NULL;
2021-08-06 14:32:04 -04:00
while (cur) {
2021-08-02 11:41:15 -04:00
2021-08-06 14:32:04 -04:00
key = xmlNodeListGetString(doc, cur->children, 1);
2021-08-02 11:41:15 -04:00
2021-08-06 14:32:04 -04:00
if (TAGIS(cur, "item")) {
2021-08-02 11:41:15 -04:00
itemStruct *item = ecalloc(1, sizeof(itemStruct));
2021-09-22 18:50:42 -04:00
// Build a linked list of item structs to pass to itemAction()
2021-08-02 11:41:15 -04:00
item->next = prev;
prev = item;
xmlNodePtr itemNode = cur->children;
while (itemNode) {
char *itemKey = (char *)xmlNodeListGetString(doc, itemNode->children, 1);
char *attKeys[] = {
"title",
"link",
"description",
};
char **atts[] = {
&item->title,
&item->link,
&item->description,
};
if (itemKey) {
for (unsigned long int i = 0; i < LEN(attKeys); i++) {
if (TAGIS(itemNode, attKeys[i])) {
*atts[i] = ecalloc(strlen(itemKey) + 1, sizeof(char));
strcpy(*atts[i], itemKey);
}
}
xmlFree(itemKey);
}
itemNode = itemNode->next;
}
2021-08-06 14:32:04 -04:00
}
2021-08-02 11:41:15 -04:00
2021-08-06 14:32:04 -04:00
xmlFree(key);
cur = cur->next;
}
2021-08-02 11:41:15 -04:00
2021-08-06 14:32:04 -04:00
errno = 0;
int stat = mkdir((const char* ) feedName, S_IRWXU);
2021-08-02 11:41:15 -04:00
2021-08-06 14:32:04 -04:00
if (!stat && errno && errno != EEXIST) {
logMsg(1, "Error creating directory for feed.\n");
return 1;
}
2021-08-02 11:41:15 -04:00
itemAction(prev, feedName);
2021-08-06 14:32:04 -04:00
return 0;
2021-08-02 11:41:15 -04:00
}
int
readDoc(char *content,
2021-08-06 14:32:04 -04:00
const char *feedName,
void itemAction(itemStruct *, const char *))
2021-08-02 11:41:15 -04:00
{
2021-09-22 18:50:42 -04:00
// Initialize the XML document, read it, then free it
2021-08-06 14:32:04 -04:00
xmlDocPtr doc;
2021-08-02 11:41:15 -04:00
2021-08-06 14:32:04 -04:00
doc = xmlReadMemory(content, strlen(content), "noname.xml", NULL, 0);
if (!doc) {
logMsg(1, "XML parser error.\n");
return 1;
}
2021-08-02 11:41:15 -04:00
2021-08-06 14:32:04 -04:00
int stat = parseXml(doc, feedName, itemAction);
2021-08-02 11:41:15 -04:00
2021-08-06 14:32:04 -04:00
xmlFreeDoc(doc);
2021-08-02 11:41:15 -04:00
2021-08-06 14:32:04 -04:00
return stat;
2021-08-02 11:41:15 -04:00
}