2022-04-09 17:46:16 -04:00
|
|
|
/*
|
|
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License along with this program. If not, see https://www.gnu.org/licenses/.
|
|
|
|
|
|
|
|
© 2021 dogeystamp <dogeystamp@disroot.org>
|
|
|
|
*/
|
|
|
|
|
2021-08-02 11:41:15 -04:00
|
|
|
#include <stdlib.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdarg.h>
|
|
|
|
#include <string.h>
|
2023-01-10 02:05:18 +00:00
|
|
|
#include <time.h>
|
|
|
|
#include <utime.h>
|
2022-10-27 20:36:51 -04:00
|
|
|
#include <libxml/parser.h>
|
|
|
|
#include <libxml/tree.h>
|
|
|
|
#include <libxml/xmlreader.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <errno.h>
|
2021-08-02 11:41:15 -04:00
|
|
|
|
|
|
|
#include "util.h"
|
|
|
|
#include "net.h"
|
2022-10-27 21:17:24 -04:00
|
|
|
#include "handlers.h"
|
|
|
|
#include "config.h"
|
2022-10-27 20:36:51 -04:00
|
|
|
|
2022-10-30 13:44:05 -04:00
|
|
|
static inline int
|
|
|
|
tagIs(xmlNodePtr node, char *str)
|
|
|
|
{
|
|
|
|
return !xmlStrcmp(node->name, (const xmlChar *) str);
|
|
|
|
}
|
2022-10-27 20:36:51 -04:00
|
|
|
|
|
|
|
static int
|
|
|
|
parseXml(xmlDocPtr doc,
|
|
|
|
const char *feedName,
|
|
|
|
void itemAction(itemStruct *, const char *))
|
|
|
|
{
|
|
|
|
// Parse the XML in a single document.
|
|
|
|
|
|
|
|
if (!feedName || !feedName[0]) {
|
2023-02-04 10:46:22 -05:00
|
|
|
logMsg(LOG_ERROR, "Missing feed name, please set one.\n");
|
2022-10-27 20:36:51 -04:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
xmlNodePtr rootNode;
|
|
|
|
|
|
|
|
rootNode = xmlDocGetRootElement(doc);
|
|
|
|
|
|
|
|
if (!rootNode) {
|
2023-02-04 10:46:22 -05:00
|
|
|
logMsg(LOG_ERROR, "Empty document for feed.\n");
|
2022-10-27 20:36:51 -04:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
enum feedFormat format = NONE;
|
|
|
|
|
2022-10-30 13:44:05 -04:00
|
|
|
if (tagIs(rootNode, "rss")) {
|
2022-10-27 20:36:51 -04:00
|
|
|
format = RSS;
|
2022-10-30 13:44:05 -04:00
|
|
|
} else if (tagIs(rootNode, "feed")) {
|
2022-10-27 20:36:51 -04:00
|
|
|
if (!xmlStrcmp(rootNode->ns->href, (const xmlChar *) "http://www.w3.org/2005/Atom"))
|
|
|
|
format = ATOM;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (format == NONE) {
|
2023-02-04 10:46:22 -05:00
|
|
|
logMsg(LOG_ERROR, "XML document is not an RSS or Atom feed.\n");
|
2022-10-27 20:36:51 -04:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Pointer to the first child of the root XML node
|
|
|
|
xmlNodePtr cur = rootNode->children;
|
|
|
|
|
|
|
|
switch (format) {
|
|
|
|
case RSS:
|
|
|
|
// Get channel XML tag
|
2022-10-30 13:44:05 -04:00
|
|
|
while(cur && !tagIs(cur, "channel"))
|
2022-10-27 20:36:51 -04:00
|
|
|
cur = cur->next;
|
|
|
|
|
2022-10-30 13:44:05 -04:00
|
|
|
if (!cur || !tagIs(cur, "channel")) {
|
2023-02-04 10:46:22 -05:00
|
|
|
logMsg(LOG_ERROR, "Invalid RSS syntax.\n");
|
2022-10-27 20:36:51 -04:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Set cur to child of channel
|
|
|
|
cur = cur->children;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ATOM:
|
|
|
|
// Set cur to child of feed
|
|
|
|
cur = rootNode->children;
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
2023-02-04 10:46:22 -05:00
|
|
|
logMsg(LOG_ERROR, "Missing starting tag for format\n");
|
2022-10-27 20:36:51 -04:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Previous item (to build a linked list later)
|
|
|
|
itemStruct *prev = NULL;
|
|
|
|
|
|
|
|
// Loop over articles (skipping non-article tags)
|
|
|
|
while (cur) {
|
|
|
|
|
|
|
|
short isArticle = 0;
|
|
|
|
|
|
|
|
switch (format) {
|
|
|
|
case RSS:
|
2022-10-30 13:44:05 -04:00
|
|
|
isArticle = tagIs(cur, "item");
|
2022-10-27 20:36:51 -04:00
|
|
|
break;
|
|
|
|
case ATOM:
|
2022-10-30 13:44:05 -04:00
|
|
|
isArticle = tagIs(cur, "entry");
|
2022-10-27 20:36:51 -04:00
|
|
|
break;
|
|
|
|
default:
|
2023-02-04 10:46:22 -05:00
|
|
|
logMsg(LOG_ERROR, "Missing article tag name for format\n");
|
2022-10-27 20:36:51 -04:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (isArticle) {
|
|
|
|
itemStruct *item = ecalloc(1, sizeof(itemStruct));
|
|
|
|
|
|
|
|
// Build a linked list of item structs to pass to itemAction()
|
|
|
|
item->next = prev;
|
|
|
|
prev = item;
|
|
|
|
|
|
|
|
xmlNodePtr itemNode = cur->children;
|
|
|
|
|
|
|
|
// Value within the tag
|
|
|
|
char *itemKey;
|
|
|
|
|
|
|
|
while (itemNode) {
|
|
|
|
itemKey = (char *)xmlNodeListGetString(doc, itemNode->children, 1);
|
|
|
|
|
2022-10-29 17:48:36 -04:00
|
|
|
switch (format) {
|
|
|
|
case RSS:
|
2022-10-30 13:44:05 -04:00
|
|
|
if (tagIs(itemNode, "link"))
|
2022-10-29 17:48:36 -04:00
|
|
|
copyField(item, FIELD_LINK, itemKey);
|
2022-10-30 13:44:05 -04:00
|
|
|
else if (tagIs(itemNode, "description"))
|
2022-10-29 17:48:36 -04:00
|
|
|
copyField(item, FIELD_DESCRIPTION, itemKey);
|
2022-10-30 13:44:05 -04:00
|
|
|
else if (tagIs(itemNode, "title"))
|
2022-10-29 17:48:36 -04:00
|
|
|
copyField(item, FIELD_TITLE, itemKey);
|
2022-10-30 13:44:05 -04:00
|
|
|
else if (tagIs(itemNode, "enclosure"))
|
2022-10-29 17:48:36 -04:00
|
|
|
rssEnclosure(item, itemNode);
|
|
|
|
break;
|
|
|
|
case ATOM:
|
2022-10-30 13:44:05 -04:00
|
|
|
if (tagIs(itemNode, "link"))
|
2022-10-29 17:48:36 -04:00
|
|
|
atomLink(item, itemNode);
|
2022-10-30 13:44:05 -04:00
|
|
|
else if (tagIs(itemNode, "content"))
|
2022-10-29 17:48:36 -04:00
|
|
|
copyField(item, FIELD_DESCRIPTION, itemKey);
|
2022-10-30 13:44:05 -04:00
|
|
|
else if (tagIs(itemNode, "title"))
|
2022-10-29 17:48:36 -04:00
|
|
|
copyField(item, FIELD_TITLE, itemKey);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
2022-10-27 20:36:51 -04:00
|
|
|
}
|
|
|
|
|
2022-10-29 17:48:36 -04:00
|
|
|
xmlFree(itemKey);
|
|
|
|
|
2022-10-27 20:36:51 -04:00
|
|
|
itemNode = itemNode->next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
cur = cur->next;
|
|
|
|
}
|
|
|
|
|
|
|
|
errno = 0;
|
|
|
|
int stat = mkdir((const char* ) feedName, S_IRWXU);
|
|
|
|
|
|
|
|
if (!stat && errno && errno != EEXIST) {
|
2023-02-04 10:46:22 -05:00
|
|
|
logMsg(LOG_ERROR, "Error creating directory for feed.\n");
|
2022-10-27 20:36:51 -04:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
itemAction(prev, feedName);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
readDoc(char *content,
|
|
|
|
const char *feedName,
|
|
|
|
void itemAction(itemStruct *, const char *))
|
|
|
|
{
|
|
|
|
// Initialize the XML document, read it, then free it
|
|
|
|
|
|
|
|
xmlDocPtr doc;
|
|
|
|
|
|
|
|
doc = xmlReadMemory(content, strlen(content), "noname.xml", NULL, 0);
|
|
|
|
if (!doc) {
|
2023-02-04 10:46:22 -05:00
|
|
|
logMsg(LOG_ERROR, "XML parser error.\n");
|
2022-10-27 20:36:51 -04:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
int stat = parseXml(doc, feedName, itemAction);
|
|
|
|
|
|
|
|
if (stat)
|
2023-02-04 10:46:22 -05:00
|
|
|
logMsg(LOG_ERROR, "Skipped feed %s due to errors.\n", feedName);
|
2022-10-27 20:36:51 -04:00
|
|
|
|
|
|
|
xmlFreeDoc(doc);
|
|
|
|
|
|
|
|
return stat;
|
|
|
|
}
|
2021-08-02 11:41:15 -04:00
|
|
|
|
|
|
|
int
|
2022-05-23 12:27:00 -04:00
|
|
|
main(int argc, char *argv[])
|
2021-08-02 11:41:15 -04:00
|
|
|
{
|
2021-08-02 13:55:46 -04:00
|
|
|
if (argc == 2 && !strcmp("-v", argv[1]))
|
2023-02-04 10:46:22 -05:00
|
|
|
logMsg(LOG_FATAL, "MinRSS %s\n", VERSION);
|
2021-08-02 13:55:46 -04:00
|
|
|
else if (argc != 1)
|
2023-02-04 10:46:22 -05:00
|
|
|
logMsg(LOG_FATAL, "Usage: minrss [-v]\n");
|
2021-08-02 13:55:46 -04:00
|
|
|
|
2021-08-06 14:32:04 -04:00
|
|
|
unsigned int i = 0;
|
2021-08-02 11:41:15 -04:00
|
|
|
|
2021-08-06 14:32:04 -04:00
|
|
|
initCurl();
|
2021-08-02 11:41:15 -04:00
|
|
|
|
2021-08-06 14:32:04 -04:00
|
|
|
outputStruct outputs[LEN(links)];
|
|
|
|
memset(outputs, 0, sizeof(outputs));
|
2021-08-02 11:41:15 -04:00
|
|
|
|
2023-01-10 02:05:18 +00:00
|
|
|
time_t timeNow = time(NULL);
|
|
|
|
|
2021-08-06 14:32:04 -04:00
|
|
|
for (i = 0; i < LEN(links); i++) {
|
2023-01-10 02:05:18 +00:00
|
|
|
struct stat feedDir;
|
|
|
|
|
2021-08-02 11:41:15 -04:00
|
|
|
if (links[0].url[0] == '\0')
|
2023-02-04 10:46:22 -05:00
|
|
|
logMsg(LOG_FATAL, "No feeds, add them in config.def.h\n");
|
2021-08-02 11:41:15 -04:00
|
|
|
|
2023-01-10 02:05:18 +00:00
|
|
|
if (stat(links[i].feedName, &feedDir) == 0) {
|
2023-01-15 12:33:27 -05:00
|
|
|
time_t deltaTime = timeNow - feedDir.st_atime;
|
2023-01-10 02:05:18 +00:00
|
|
|
if (deltaTime < links[i].update)
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2023-02-04 10:46:22 -05:00
|
|
|
logMsg(LOG_VERBOSE, "Requesting %s\n", links[i].url);
|
2021-08-06 14:32:04 -04:00
|
|
|
createRequest(links[i].url, &outputs[i]);
|
|
|
|
}
|
2021-08-02 11:41:15 -04:00
|
|
|
|
2021-08-06 14:32:04 -04:00
|
|
|
performRequests(finish);
|
2021-08-02 11:41:15 -04:00
|
|
|
|
2023-02-04 10:46:22 -05:00
|
|
|
logMsg(LOG_INFO, "Finished downloads.\n");
|
2021-08-02 11:41:15 -04:00
|
|
|
|
2021-08-06 14:32:04 -04:00
|
|
|
for (i = 0; i < LEN(links); i++) {
|
2023-02-04 10:46:22 -05:00
|
|
|
logMsg(LOG_VERBOSE, "Parsing %s\n", links[i].url);
|
2021-08-02 11:41:15 -04:00
|
|
|
|
|
|
|
if (outputs[i].buffer && outputs[i].buffer[0]) {
|
2023-01-10 02:05:18 +00:00
|
|
|
if (readDoc(outputs[i].buffer, links[i].feedName, itemAction) == 0) {
|
|
|
|
struct stat feedDir;
|
|
|
|
|
|
|
|
if (stat(links[i].feedName, &feedDir) == 0) {
|
|
|
|
struct utimbuf update;
|
|
|
|
|
|
|
|
update.actime = timeNow;
|
2023-01-15 12:33:27 -05:00
|
|
|
update.modtime = feedDir.st_mtime;
|
2023-01-10 02:05:18 +00:00
|
|
|
utime(links[i].feedName, &update);
|
|
|
|
}
|
|
|
|
}
|
2021-08-02 11:41:15 -04:00
|
|
|
free(outputs[i].buffer);
|
|
|
|
}
|
2021-08-06 14:32:04 -04:00
|
|
|
}
|
2021-08-02 11:41:15 -04:00
|
|
|
|
2023-02-04 10:46:22 -05:00
|
|
|
logMsg(LOG_INFO, "Finished parsing feeds.\n");
|
2021-08-02 11:41:15 -04:00
|
|
|
|
2021-08-06 14:32:04 -04:00
|
|
|
return 0;
|
2021-08-02 11:41:15 -04:00
|
|
|
}
|