Compare commits
11 Commits
5d2ab702f9
...
92dad5b3dc
Author | SHA1 | Date | |
---|---|---|---|
92dad5b3dc | |||
d41a612d9d | |||
1df7e7cbc7 | |||
0e81a0b5e2 | |||
418a835857 | |||
036206d2be | |||
9df7ce934c | |||
0ebb379a64 | |||
73b0cad54f | |||
453e36317f | |||
04eaae7230 |
19
Makefile
19
Makefile
@ -1,15 +1,20 @@
|
|||||||
PREFIX = ~/.local
|
PREFIX = ~/.local
|
||||||
VERSION = 0.1
|
VERSION = 0.2
|
||||||
|
|
||||||
# CC = cc
|
|
||||||
SRC = minrss.c util.c net.c xml.c
|
|
||||||
OBJ = $(SRC:.c=.o)
|
|
||||||
PKG_CONFIG = pkg-config
|
PKG_CONFIG = pkg-config
|
||||||
|
|
||||||
|
# Comment out if JSON output support isn't needed
|
||||||
|
JSONLIBS = `$(PKG_CONFIG) --libs json-c`
|
||||||
|
JSONINCS = `$(PKG_CONFIG) --cflags json-c`
|
||||||
|
JSONFLAG = -DJSON
|
||||||
|
|
||||||
CURL_CONFIG = curl-config
|
CURL_CONFIG = curl-config
|
||||||
INCS = `$(PKG_CONFIG) --cflags libxml-2.0` `$(CURL_CONFIG) --cflags`
|
SRC = minrss.c util.c net.c handlers.c
|
||||||
LIBS = `$(PKG_CONFIG) --libs libxml-2.0` `$(CURL_CONFIG) --libs`
|
OBJ = $(SRC:.c=.o)
|
||||||
|
INCS = `$(PKG_CONFIG) --cflags libxml-2.0` `$(CURL_CONFIG) --cflags` $(JSONINC)
|
||||||
|
LIBS = `$(PKG_CONFIG) --libs libxml-2.0` `$(CURL_CONFIG) --libs` $(JSONLIBS)
|
||||||
WARN = -Wall -Wpedantic -Wextra
|
WARN = -Wall -Wpedantic -Wextra
|
||||||
CFLAGS = $(INCS) $(LIBS) $(WARN) -DVERSION=\"$(VERSION)\"
|
CFLAGS = $(INCS) $(LIBS) $(WARN) -DVERSION=\"$(VERSION)\" $(JSONFLAG)
|
||||||
|
|
||||||
all: config.h minrss
|
all: config.h minrss
|
||||||
|
|
||||||
|
11
README
11
README
@ -1,8 +1,10 @@
|
|||||||
MinRSS
|
MinRSS
|
||||||
======
|
======
|
||||||
MinRSS is an RSS/Atom feed reader for Linux inspired by suckless.org's
|
MinRSS is an RSS/Atom feed reader for Linux inspired by suckless.org's IRC
|
||||||
IRC clients ii and sic. Instead of presenting articles as entries
|
clients ii and sic. Instead of presenting articles as entries in a menu, it
|
||||||
in a menu, it saves them as files in folders.
|
saves them as files in folders.
|
||||||
|
|
||||||
|
These files can either be formatted as HTML, or as JSON to help with scripting.
|
||||||
|
|
||||||
rss
|
rss
|
||||||
|--news
|
|--news
|
||||||
@ -16,6 +18,9 @@ Requirements
|
|||||||
------------
|
------------
|
||||||
You need libcurl and libxml2 to compile MinRSS.
|
You need libcurl and libxml2 to compile MinRSS.
|
||||||
|
|
||||||
|
json-c is required for JSON output. To disable this feature, comment out the
|
||||||
|
relevant lines in Makefile.
|
||||||
|
|
||||||
Installation
|
Installation
|
||||||
------------
|
------------
|
||||||
Run this command to build MinRSS:
|
Run this command to build MinRSS:
|
||||||
|
12
config.def.h
12
config.def.h
@ -21,7 +21,6 @@ typedef struct {
|
|||||||
.url = "https://example.com/rss/feed.rss",
|
.url = "https://example.com/rss/feed.rss",
|
||||||
// This will be used as the folder name for the feed.
|
// This will be used as the folder name for the feed.
|
||||||
.feedName = "examplefeed",
|
.feedName = "examplefeed",
|
||||||
.tags = "test example sample"
|
|
||||||
},
|
},
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -50,5 +49,12 @@ static const int maxRedirs = 10;
|
|||||||
// For more information: https://curl.se/libcurl/c/CURLOPT_PROTOCOLS.html
|
// For more information: https://curl.se/libcurl/c/CURLOPT_PROTOCOLS.html
|
||||||
static const int curlProtocols = CURLPROTO_HTTPS | CURLPROTO_HTTP;
|
static const int curlProtocols = CURLPROTO_HTTPS | CURLPROTO_HTTP;
|
||||||
|
|
||||||
// File extension used for each article.
|
enum outputFormats {
|
||||||
static const char fileExt[] = ".html";
|
OUTPUT_HTML,
|
||||||
|
#ifdef JSON
|
||||||
|
OUTPUT_JSON,
|
||||||
|
#endif // JSON
|
||||||
|
};
|
||||||
|
|
||||||
|
// When saving, sets the format of the saved file.
|
||||||
|
static const enum outputFormats outputFormat = OUTPUT_HTML;
|
||||||
|
239
handlers.c
Normal file
239
handlers.c
Normal file
@ -0,0 +1,239 @@
|
|||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <libxml/parser.h>
|
||||||
|
#include <libxml/tree.h>
|
||||||
|
#include <libxml/xmlreader.h>
|
||||||
|
#ifdef JSON
|
||||||
|
#include <json-c/json.h>
|
||||||
|
#endif // JSON
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
#include "util.h"
|
||||||
|
#include "handlers.h"
|
||||||
|
|
||||||
|
void
|
||||||
|
freeItem(itemStruct *item)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < FIELD_END; i++) {
|
||||||
|
if (item->fields[i])
|
||||||
|
free(item->fields[i]);
|
||||||
|
}
|
||||||
|
free(item);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int
|
||||||
|
propIs(xmlChar *prop, char *name)
|
||||||
|
{
|
||||||
|
return !xmlStrcmp(prop, (const xmlChar *) name);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
allocField(char **field, char *str)
|
||||||
|
{
|
||||||
|
size_t len = strlen(str) + 1;
|
||||||
|
char *fieldStr = ecalloc(len, sizeof(char));
|
||||||
|
memcpy(fieldStr, str, len * sizeof(char));
|
||||||
|
*field = fieldStr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
copyField(itemStruct *item, enum fields field, char *str)
|
||||||
|
{
|
||||||
|
allocField(&item->fields[field], str);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
atomLink(itemStruct *item, xmlNodePtr node)
|
||||||
|
{
|
||||||
|
xmlChar *href = xmlGetProp(node, (xmlChar *) "href");
|
||||||
|
xmlChar *rel = xmlGetProp(node, (xmlChar *) "rel");
|
||||||
|
|
||||||
|
if (!href) {
|
||||||
|
logMsg(1, "Invalid link tag.\n");
|
||||||
|
if (rel)
|
||||||
|
xmlFree(rel);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!rel || propIs(rel, "alternate")) {
|
||||||
|
copyField(item, FIELD_LINK, (char *)href);
|
||||||
|
} else if (propIs(rel, "enclosure")) {
|
||||||
|
copyField(item, FIELD_ENCLOSURE_URL, (char *)href);
|
||||||
|
}
|
||||||
|
|
||||||
|
xmlFree(href);
|
||||||
|
xmlFree(rel);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
rssEnclosure(itemStruct *item, xmlNodePtr node)
|
||||||
|
{
|
||||||
|
xmlChar *href = xmlGetProp(node, (xmlChar *) "url");
|
||||||
|
if (!href) {
|
||||||
|
logMsg(1, "Invalid enclosure URL.\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
copyField(item, FIELD_ENCLOSURE_URL, (char *)href);
|
||||||
|
|
||||||
|
xmlFree(href);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
FILE *
|
||||||
|
openFile(const char *folder, char *fileName, char *fileExt)
|
||||||
|
{
|
||||||
|
// [folder]/[fileName][fileExt]
|
||||||
|
// caller's responsibility to sanitize names, but frees fileName
|
||||||
|
|
||||||
|
if (!folder) {
|
||||||
|
logMsg(1, "NULL folder");
|
||||||
|
return NULL;
|
||||||
|
} else if (!fileName) {
|
||||||
|
logMsg(1, "NULL file base name");
|
||||||
|
return NULL;
|
||||||
|
} else if (!fileExt) {
|
||||||
|
logMsg(1, "NULL file extension");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t folderLen = strlen(folder);
|
||||||
|
size_t extLen = strlen(fileExt);
|
||||||
|
size_t fileNameLen = strlen(fileName);
|
||||||
|
|
||||||
|
// +1 for null terminator and +1 for path separator
|
||||||
|
size_t pathLen = folderLen + 1 + fileNameLen + extLen + 1;
|
||||||
|
|
||||||
|
char *filePath;
|
||||||
|
|
||||||
|
if (fileName[0])
|
||||||
|
filePath = ecalloc(pathLen, sizeof(char));
|
||||||
|
else {
|
||||||
|
logMsg(1, "Invalid filename.\n");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(filePath, folder, folderLen * sizeof(char));
|
||||||
|
|
||||||
|
filePath[folderLen] = fsep();
|
||||||
|
filePath[pathLen - 1] = '\0';
|
||||||
|
|
||||||
|
memcpy(filePath + folderLen + 1, fileName, fileNameLen * sizeof(char));
|
||||||
|
memcpy(filePath + pathLen - extLen - 1, fileExt, extLen * sizeof(char));
|
||||||
|
|
||||||
|
FILE *itemFile = fopen(filePath, "a");
|
||||||
|
free (filePath);
|
||||||
|
free (fileName);
|
||||||
|
|
||||||
|
return itemFile;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
outputHtml(itemStruct *item, FILE *f)
|
||||||
|
{
|
||||||
|
if (item->fields[FIELD_TITLE])
|
||||||
|
fprintf(f, "<h1>%s</h1><br>\n", item->fields[FIELD_TITLE]);
|
||||||
|
if (item->fields[FIELD_LINK])
|
||||||
|
fprintf(f, "<a href=\"%s\">Link</a><br>\n", item->fields[FIELD_LINK]);
|
||||||
|
if (item->fields[FIELD_ENCLOSURE_URL])
|
||||||
|
fprintf(f, "<a href=\"%s\">Enclosure</a><br>\n", item->fields[FIELD_ENCLOSURE_URL]);
|
||||||
|
if (item->fields[FIELD_DESCRIPTION])
|
||||||
|
fprintf(f, "%s", item->fields[FIELD_DESCRIPTION]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef JSON
|
||||||
|
static void
|
||||||
|
outputJson(itemStruct *item, FILE *f)
|
||||||
|
{
|
||||||
|
json_object *root = json_object_new_object();
|
||||||
|
|
||||||
|
if (item->fields[FIELD_TITLE])
|
||||||
|
json_object_object_add(root, "title",
|
||||||
|
json_object_new_string(item->fields[FIELD_TITLE]));
|
||||||
|
|
||||||
|
if (item->fields[FIELD_LINK])
|
||||||
|
json_object_object_add(root, "link",
|
||||||
|
json_object_new_string(item->fields[FIELD_LINK]));
|
||||||
|
|
||||||
|
if (item->fields[FIELD_ENCLOSURE_URL]) {
|
||||||
|
json_object *enclosure = json_object_new_object();
|
||||||
|
json_object_object_add(enclosure, "link",
|
||||||
|
json_object_new_string(item->fields[FIELD_ENCLOSURE_URL]));
|
||||||
|
json_object_object_add(root, "enclosure", enclosure);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (item->fields[FIELD_DESCRIPTION])
|
||||||
|
json_object_object_add(root, "description",
|
||||||
|
json_object_new_string(item->fields[FIELD_DESCRIPTION]));
|
||||||
|
|
||||||
|
fprintf(f, "%s", json_object_to_json_string_ext(root, 0));
|
||||||
|
json_object_put(root);
|
||||||
|
}
|
||||||
|
#endif // JSON
|
||||||
|
|
||||||
|
void
|
||||||
|
itemAction(itemStruct *item, const char *folder)
|
||||||
|
{
|
||||||
|
// Receives a linked list of articles to process.
|
||||||
|
|
||||||
|
itemStruct *cur = item;
|
||||||
|
itemStruct *prev;
|
||||||
|
|
||||||
|
unsigned long long int newItems = 0;
|
||||||
|
|
||||||
|
while (cur) {
|
||||||
|
prev = cur;
|
||||||
|
|
||||||
|
char fileExt[10];
|
||||||
|
void (*outputFunction)(itemStruct *, FILE *);
|
||||||
|
|
||||||
|
switch (outputFormat) {
|
||||||
|
case OUTPUT_HTML:
|
||||||
|
memcpy(fileExt, ".html", 6);
|
||||||
|
outputFunction = &outputHtml;
|
||||||
|
break;
|
||||||
|
#ifdef JSON
|
||||||
|
case OUTPUT_JSON:
|
||||||
|
memcpy(fileExt, ".json", 6);
|
||||||
|
outputFunction = &outputJson;
|
||||||
|
break;
|
||||||
|
#endif //JSON
|
||||||
|
|
||||||
|
default:
|
||||||
|
logMsg(0, "Output format is invalid.");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
FILE *itemFile = openFile(folder, san(cur->fields[FIELD_TITLE]), fileExt);
|
||||||
|
|
||||||
|
// Do not overwrite files
|
||||||
|
if (!ftell(itemFile)) {
|
||||||
|
outputFunction(cur, itemFile);
|
||||||
|
newItems++;
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(itemFile);
|
||||||
|
cur = cur->next;
|
||||||
|
freeItem(prev);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (newItems)
|
||||||
|
logMsg(2, "%s : %d new articles\n", folder, newItems);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
finish(char *url, long responseCode)
|
||||||
|
{
|
||||||
|
// Executed after a download finishes
|
||||||
|
|
||||||
|
if (responseCode == 200)
|
||||||
|
logMsg(4, "Finished downloading %s\n", url);
|
||||||
|
else if (!responseCode)
|
||||||
|
logMsg(1, "Can not reach %s: ensure the protocol is enabled and the site is accessible.\n", url);
|
||||||
|
else
|
||||||
|
logMsg(1, "HTTP %ld for %s\n", responseCode, url);
|
||||||
|
}
|
36
handlers.h
Normal file
36
handlers.h
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
enum feedFormat {
|
||||||
|
NONE,
|
||||||
|
RSS,
|
||||||
|
ATOM,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum fields {
|
||||||
|
FIELD_TITLE,
|
||||||
|
FIELD_LINK,
|
||||||
|
FIELD_DESCRIPTION,
|
||||||
|
FIELD_ENCLOSURE_URL,
|
||||||
|
|
||||||
|
FIELD_END
|
||||||
|
};
|
||||||
|
enum numFields {
|
||||||
|
// currently unimplemented
|
||||||
|
NUM_ENCLOSURE_SIZE,
|
||||||
|
|
||||||
|
NUM_END
|
||||||
|
};
|
||||||
|
typedef struct itemStruct itemStruct;
|
||||||
|
struct itemStruct {
|
||||||
|
char *fields[FIELD_END];
|
||||||
|
int numFields[NUM_END];
|
||||||
|
itemStruct *next;
|
||||||
|
};
|
||||||
|
|
||||||
|
void copyField(itemStruct *item, enum fields field, char *str);
|
||||||
|
|
||||||
|
void freeItem(itemStruct *item);
|
||||||
|
void itemAction(itemStruct *item, const char *folder);
|
||||||
|
void finish(char *url, long responseCode);
|
||||||
|
|
||||||
|
int rssEnclosure(itemStruct *item, xmlNodePtr node);
|
||||||
|
|
||||||
|
int atomLink(itemStruct *item, xmlNodePtr node);
|
217
minrss.c
217
minrss.c
@ -13,90 +13,191 @@ You should have received a copy of the GNU General Public License along with thi
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <libxml/parser.h>
|
||||||
|
#include <libxml/tree.h>
|
||||||
|
#include <libxml/xmlreader.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <errno.h>
|
||||||
|
|
||||||
#include "config.h"
|
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
#include "net.h"
|
#include "net.h"
|
||||||
#include "xml.h"
|
#include "handlers.h"
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
void
|
static inline int
|
||||||
itemAction(itemStruct *item, const char *folder)
|
tagIs(xmlNodePtr node, char *str)
|
||||||
{
|
{
|
||||||
// Receives a link list of articles to process.
|
return !xmlStrcmp(node->name, (const xmlChar *) str);
|
||||||
|
}
|
||||||
|
|
||||||
itemStruct *cur = item;
|
static int
|
||||||
|
parseXml(xmlDocPtr doc,
|
||||||
|
const char *feedName,
|
||||||
|
void itemAction(itemStruct *, const char *))
|
||||||
|
{
|
||||||
|
// Parse the XML in a single document.
|
||||||
|
|
||||||
unsigned long long int newItems = 0;
|
if (!feedName || !feedName[0]) {
|
||||||
|
logMsg(1, "Missing feed name, please set one.\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
size_t folderLen = strlen(folder);
|
xmlNodePtr rootNode;
|
||||||
size_t extLen = strlen(fileExt);
|
|
||||||
|
|
||||||
|
rootNode = xmlDocGetRootElement(doc);
|
||||||
|
|
||||||
|
if (!rootNode) {
|
||||||
|
logMsg(1, "Empty document for feed.\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum feedFormat format = NONE;
|
||||||
|
|
||||||
|
if (tagIs(rootNode, "rss")) {
|
||||||
|
format = RSS;
|
||||||
|
} else if (tagIs(rootNode, "feed")) {
|
||||||
|
if (!xmlStrcmp(rootNode->ns->href, (const xmlChar *) "http://www.w3.org/2005/Atom"))
|
||||||
|
format = ATOM;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (format == NONE) {
|
||||||
|
logMsg(1, "XML document is not an RSS or Atom feed.\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pointer to the first child of the root XML node
|
||||||
|
xmlNodePtr cur = rootNode->children;
|
||||||
|
|
||||||
|
switch (format) {
|
||||||
|
case RSS:
|
||||||
|
// Get channel XML tag
|
||||||
|
while(cur && !tagIs(cur, "channel"))
|
||||||
|
cur = cur->next;
|
||||||
|
|
||||||
|
if (!cur || !tagIs(cur, "channel")) {
|
||||||
|
logMsg(1, "Invalid RSS syntax.\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set cur to child of channel
|
||||||
|
cur = cur->children;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ATOM:
|
||||||
|
// Set cur to child of feed
|
||||||
|
cur = rootNode->children;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
logMsg(1, "Missing starting tag for format\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Previous item (to build a linked list later)
|
||||||
|
itemStruct *prev = NULL;
|
||||||
|
|
||||||
|
// Loop over articles (skipping non-article tags)
|
||||||
while (cur) {
|
while (cur) {
|
||||||
char *filePath;
|
|
||||||
char *fileName = san(cur->title, 1);
|
|
||||||
size_t fileNameLen = strlen(fileName);
|
|
||||||
|
|
||||||
itemStruct *prev = cur;
|
short isArticle = 0;
|
||||||
|
|
||||||
|
switch (format) {
|
||||||
|
case RSS:
|
||||||
|
isArticle = tagIs(cur, "item");
|
||||||
|
break;
|
||||||
|
case ATOM:
|
||||||
|
isArticle = tagIs(cur, "entry");
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
logMsg(1, "Missing article tag name for format\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
// +1 for null terminator and +1 for path separator
|
if (isArticle) {
|
||||||
size_t pathLen = folderLen + fileNameLen + extLen + 2;
|
itemStruct *item = ecalloc(1, sizeof(itemStruct));
|
||||||
|
|
||||||
if (fileName[0])
|
// Build a linked list of item structs to pass to itemAction()
|
||||||
filePath = ecalloc(pathLen, sizeof(char));
|
item->next = prev;
|
||||||
else {
|
prev = item;
|
||||||
logMsg(1, "Invalid article title.\n");
|
|
||||||
|
xmlNodePtr itemNode = cur->children;
|
||||||
|
|
||||||
|
// Value within the tag
|
||||||
|
char *itemKey;
|
||||||
|
|
||||||
|
while (itemNode) {
|
||||||
|
itemKey = (char *)xmlNodeListGetString(doc, itemNode->children, 1);
|
||||||
|
|
||||||
|
switch (format) {
|
||||||
|
case RSS:
|
||||||
|
if (tagIs(itemNode, "link"))
|
||||||
|
copyField(item, FIELD_LINK, itemKey);
|
||||||
|
else if (tagIs(itemNode, "description"))
|
||||||
|
copyField(item, FIELD_DESCRIPTION, itemKey);
|
||||||
|
else if (tagIs(itemNode, "title"))
|
||||||
|
copyField(item, FIELD_TITLE, itemKey);
|
||||||
|
else if (tagIs(itemNode, "enclosure"))
|
||||||
|
rssEnclosure(item, itemNode);
|
||||||
|
break;
|
||||||
|
case ATOM:
|
||||||
|
if (tagIs(itemNode, "link"))
|
||||||
|
atomLink(item, itemNode);
|
||||||
|
else if (tagIs(itemNode, "content"))
|
||||||
|
copyField(item, FIELD_DESCRIPTION, itemKey);
|
||||||
|
else if (tagIs(itemNode, "title"))
|
||||||
|
copyField(item, FIELD_TITLE, itemKey);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
xmlFree(itemKey);
|
||||||
|
|
||||||
|
itemNode = itemNode->next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
cur = cur->next;
|
cur = cur->next;
|
||||||
freeItem(prev);
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(filePath, folder, folderLen * sizeof(char));
|
errno = 0;
|
||||||
|
int stat = mkdir((const char* ) feedName, S_IRWXU);
|
||||||
|
|
||||||
filePath[folderLen] = fsep();
|
if (!stat && errno && errno != EEXIST) {
|
||||||
filePath[pathLen - 1] = '\0';
|
logMsg(1, "Error creating directory for feed.\n");
|
||||||
|
return 1;
|
||||||
memcpy(filePath + folderLen + 1, fileName, fileNameLen * sizeof(char));
|
|
||||||
memcpy(filePath + pathLen - extLen - 1, fileExt, extLen * sizeof(char));
|
|
||||||
|
|
||||||
FILE *itemFile = fopen(filePath, "a");
|
|
||||||
|
|
||||||
free(filePath);
|
|
||||||
free(fileName);
|
|
||||||
|
|
||||||
|
|
||||||
// Do not overwrite files
|
|
||||||
if (!ftell(itemFile)) {
|
|
||||||
newItems++;
|
|
||||||
|
|
||||||
fprintf(itemFile, "<h1>%s</h1><br>\n", cur->title);
|
|
||||||
fprintf(itemFile, "<a href=\"%s\">Link</a><br>\n", san(cur->link, 0));
|
|
||||||
fprintf(itemFile, "%s", san(cur->description, 0));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fclose(itemFile);
|
itemAction(prev, feedName);
|
||||||
|
|
||||||
cur = cur->next;
|
return 0;
|
||||||
freeItem(prev);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (newItems)
|
int
|
||||||
logMsg(2, "%s : %d new articles\n", folder, newItems);
|
readDoc(char *content,
|
||||||
}
|
const char *feedName,
|
||||||
|
void itemAction(itemStruct *, const char *))
|
||||||
void
|
|
||||||
finish(char *url, long responseCode)
|
|
||||||
{
|
{
|
||||||
// Executed after a download finishes
|
// Initialize the XML document, read it, then free it
|
||||||
|
|
||||||
if (responseCode == 200)
|
xmlDocPtr doc;
|
||||||
logMsg(4, "Finished downloading %s\n", url);
|
|
||||||
else if (!responseCode)
|
doc = xmlReadMemory(content, strlen(content), "noname.xml", NULL, 0);
|
||||||
logMsg(1, "Can not reach %s: ensure the protocol is enabled and the site is accessible.\n", url);
|
if (!doc) {
|
||||||
else
|
logMsg(1, "XML parser error.\n");
|
||||||
logMsg(1, "HTTP %ld for %s\n", responseCode, url);
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int stat = parseXml(doc, feedName, itemAction);
|
||||||
|
|
||||||
|
if (stat)
|
||||||
|
logMsg(1, "Skipped feed %s due to errors.\n", feedName);
|
||||||
|
|
||||||
|
xmlFreeDoc(doc);
|
||||||
|
|
||||||
|
return stat;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
|
4
util.c
4
util.c
@ -61,12 +61,10 @@ erealloc(void *p, size_t size)
|
|||||||
}
|
}
|
||||||
|
|
||||||
char *
|
char *
|
||||||
san(char *str, int rep)
|
san(char *str)
|
||||||
{
|
{
|
||||||
if (!str)
|
if (!str)
|
||||||
return "";
|
return "";
|
||||||
if (!rep)
|
|
||||||
return str;
|
|
||||||
|
|
||||||
unsigned long long int len = strlen(str);
|
unsigned long long int len = strlen(str);
|
||||||
unsigned long long int offset = 0;
|
unsigned long long int offset = 0;
|
||||||
|
2
util.h
2
util.h
@ -14,5 +14,5 @@ You should have received a copy of the GNU General Public License along with thi
|
|||||||
void logMsg(int argc, char *msg, ...);
|
void logMsg(int argc, char *msg, ...);
|
||||||
void *ecalloc(size_t nmemb, size_t size);
|
void *ecalloc(size_t nmemb, size_t size);
|
||||||
void *erealloc(void *p, size_t nmemb);
|
void *erealloc(void *p, size_t nmemb);
|
||||||
char *san(char *str, int rep);
|
char *san(char *str);
|
||||||
char fsep();
|
char fsep();
|
||||||
|
258
xml.c
258
xml.c
@ -1,258 +0,0 @@
|
|||||||
/*
|
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along with this program. If not, see https://www.gnu.org/licenses/.
|
|
||||||
|
|
||||||
© 2021 dogeystamp <dogeystamp@disroot.org>
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <libxml/parser.h>
|
|
||||||
#include <libxml/tree.h>
|
|
||||||
#include <libxml/xmlreader.h>
|
|
||||||
#include <sys/types.h>
|
|
||||||
#include <sys/stat.h>
|
|
||||||
#include <errno.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
#include "util.h"
|
|
||||||
#include "xml.h"
|
|
||||||
|
|
||||||
#define TAGIS(X, Y) (!xmlStrcmp(X->name, (const xmlChar *) Y))
|
|
||||||
|
|
||||||
void
|
|
||||||
freeItem(itemStruct *item)
|
|
||||||
{
|
|
||||||
// Free the memory used by an article.
|
|
||||||
|
|
||||||
char **mems[] = {
|
|
||||||
&item->title,
|
|
||||||
&item->link,
|
|
||||||
&item->description,
|
|
||||||
};
|
|
||||||
|
|
||||||
for (unsigned long int i = 0; i < LEN(mems); i++) {
|
|
||||||
if (*mems[i])
|
|
||||||
free(*mems[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
free(item);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
|
||||||
parseXml(xmlDocPtr doc,
|
|
||||||
const char *feedName,
|
|
||||||
void itemAction(itemStruct *, const char *))
|
|
||||||
{
|
|
||||||
// Parse the XML in a single document.
|
|
||||||
|
|
||||||
if (!feedName || !feedName[0]) {
|
|
||||||
logMsg(1, "Missing feed name, please set one.\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
xmlNodePtr rootNode;
|
|
||||||
|
|
||||||
rootNode = xmlDocGetRootElement(doc);
|
|
||||||
|
|
||||||
if (!rootNode) {
|
|
||||||
logMsg(1, "Empty document for feed.\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
enum feedFormat format = none;
|
|
||||||
|
|
||||||
if (TAGIS(rootNode, "rss")) {
|
|
||||||
format = rss;
|
|
||||||
} else if (TAGIS(rootNode, "feed")) {
|
|
||||||
if (!xmlStrcmp(rootNode->ns->href, (const xmlChar *) "http://www.w3.org/2005/Atom"))
|
|
||||||
format = atom;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if (format == none) {
|
|
||||||
logMsg(1, "XML document is not an RSS or Atom feed.\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Pointer to the first child of the root XML node
|
|
||||||
xmlNodePtr cur = rootNode->children;
|
|
||||||
|
|
||||||
switch (format) {
|
|
||||||
case rss:
|
|
||||||
// Get channel XML tag
|
|
||||||
while(cur && !TAGIS(cur, "channel"))
|
|
||||||
cur = cur->next;
|
|
||||||
|
|
||||||
if (!cur || !TAGIS(cur, "channel")) {
|
|
||||||
logMsg(1, "Invalid RSS syntax.\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set cur to child of channel
|
|
||||||
cur = cur->children;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case atom:
|
|
||||||
// Set cur to child of feed
|
|
||||||
cur = rootNode->children;
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
logMsg(1, "Missing starting tag for format\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Previous item (to build a linked list later)
|
|
||||||
itemStruct *prev = NULL;
|
|
||||||
|
|
||||||
// Loop over articles (skipping non-article tags)
|
|
||||||
while (cur) {
|
|
||||||
|
|
||||||
short isArticle = 0;
|
|
||||||
|
|
||||||
switch (format) {
|
|
||||||
case rss:
|
|
||||||
isArticle = TAGIS(cur, "item");
|
|
||||||
break;
|
|
||||||
case atom:
|
|
||||||
isArticle = TAGIS(cur, "entry");
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
logMsg(1, "Missing article tag name for format\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isArticle) {
|
|
||||||
itemStruct *item = ecalloc(1, sizeof(itemStruct));
|
|
||||||
|
|
||||||
// The selected set of attribute keys
|
|
||||||
char **attKeys;
|
|
||||||
|
|
||||||
// Struct variables to map attributes to
|
|
||||||
char **atts[] = {
|
|
||||||
&item->title,
|
|
||||||
&item->link,
|
|
||||||
&item->description,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Attribute keys for each format
|
|
||||||
|
|
||||||
char *attKeysRss[LEN(atts)] = {
|
|
||||||
"title",
|
|
||||||
"link",
|
|
||||||
"description",
|
|
||||||
};
|
|
||||||
|
|
||||||
char *attKeysAtom[LEN(atts)] = {
|
|
||||||
"title",
|
|
||||||
// link has special treatment because its value is in href not within the tag
|
|
||||||
"",
|
|
||||||
"content",
|
|
||||||
};
|
|
||||||
|
|
||||||
switch (format) {
|
|
||||||
case rss:
|
|
||||||
attKeys = attKeysRss;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case atom:
|
|
||||||
attKeys = attKeysAtom;
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
logMsg(1, "Missing article attribute keys for format\n");
|
|
||||||
return 1;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Build a linked list of item structs to pass to itemAction()
|
|
||||||
item->next = prev;
|
|
||||||
prev = item;
|
|
||||||
|
|
||||||
xmlNodePtr itemNode = cur->children;
|
|
||||||
|
|
||||||
// Value within the tag
|
|
||||||
char *itemKey;
|
|
||||||
|
|
||||||
while (itemNode) {
|
|
||||||
itemKey = (char *)xmlNodeListGetString(doc, itemNode->children, 1);
|
|
||||||
|
|
||||||
if (itemKey) {
|
|
||||||
for (unsigned long int i = 0; i < LEN(atts); i++) {
|
|
||||||
if (TAGIS(itemNode, attKeys[i])) {
|
|
||||||
size_t keyLen = strlen(itemKey) + 1;
|
|
||||||
*atts[i] = ecalloc(keyLen, sizeof(char));
|
|
||||||
memcpy(*atts[i], itemKey, keyLen * sizeof(char));
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
xmlFree(itemKey);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Exceptions
|
|
||||||
|
|
||||||
// Atom entry link tag
|
|
||||||
if (format == atom && TAGIS(itemNode, "link")) {
|
|
||||||
xmlChar *link = xmlGetProp(itemNode, (xmlChar *) "href");
|
|
||||||
|
|
||||||
if (!link) {
|
|
||||||
logMsg(1, "Missing Atom entry link\n");
|
|
||||||
xmlFree(link);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t linkLen = strlen((char *) link) + 1;
|
|
||||||
item->link = ecalloc(linkLen, sizeof(char));
|
|
||||||
memcpy(item->link, (char *) link, linkLen * sizeof(char));
|
|
||||||
|
|
||||||
xmlFree(link);
|
|
||||||
}
|
|
||||||
|
|
||||||
itemNode = itemNode->next;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
cur = cur->next;
|
|
||||||
}
|
|
||||||
|
|
||||||
errno = 0;
|
|
||||||
int stat = mkdir((const char* ) feedName, S_IRWXU);
|
|
||||||
|
|
||||||
if (!stat && errno && errno != EEXIST) {
|
|
||||||
logMsg(1, "Error creating directory for feed.\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
itemAction(prev, feedName);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
|
||||||
readDoc(char *content,
|
|
||||||
const char *feedName,
|
|
||||||
void itemAction(itemStruct *, const char *))
|
|
||||||
{
|
|
||||||
// Initialize the XML document, read it, then free it
|
|
||||||
|
|
||||||
xmlDocPtr doc;
|
|
||||||
|
|
||||||
doc = xmlReadMemory(content, strlen(content), "noname.xml", NULL, 0);
|
|
||||||
if (!doc) {
|
|
||||||
logMsg(1, "XML parser error.\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int stat = parseXml(doc, feedName, itemAction);
|
|
||||||
|
|
||||||
if (stat)
|
|
||||||
logMsg(1, "Skipped feed %s due to errors.\n", feedName);
|
|
||||||
|
|
||||||
xmlFreeDoc(doc);
|
|
||||||
|
|
||||||
return stat;
|
|
||||||
}
|
|
31
xml.h
31
xml.h
@ -1,31 +0,0 @@
|
|||||||
/*
|
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along with this program. If not, see https://www.gnu.org/licenses/.
|
|
||||||
|
|
||||||
© 2021 dogeystamp <dogeystamp@disroot.org>
|
|
||||||
*/
|
|
||||||
|
|
||||||
typedef struct itemStruct itemStruct;
|
|
||||||
struct itemStruct {
|
|
||||||
char *title;
|
|
||||||
char *link;
|
|
||||||
char *description;
|
|
||||||
itemStruct *next;
|
|
||||||
};
|
|
||||||
|
|
||||||
enum feedFormat {
|
|
||||||
none,
|
|
||||||
rss,
|
|
||||||
atom
|
|
||||||
};
|
|
||||||
|
|
||||||
void freeItem(itemStruct *item);
|
|
||||||
|
|
||||||
int readDoc(
|
|
||||||
char *content,
|
|
||||||
const char *feedName,
|
|
||||||
void itemAction(itemStruct *, char const *chanTitle));
|
|
Loading…
x
Reference in New Issue
Block a user