Compare commits

..

11 Commits

Author SHA1 Message Date
92dad5b3dc
minrss.c: convert TAGIS from macro to inline func 2022-10-30 13:44:05 -04:00
d41a612d9d
config.def.h: remove tags from example linkStruct 2022-10-29 22:33:22 -04:00
1df7e7cbc7
Implemented JSON output format 2022-10-29 22:29:34 -04:00
0e81a0b5e2
handlers.c: fix Atom links not registering if rel=alternate 2022-10-29 22:28:54 -04:00
418a835857
handlers.c: added enclosure link to output 2022-10-29 20:26:04 -04:00
036206d2be
handlers.c: add comment on openFile() about freeing fileName 2022-10-29 20:17:39 -04:00
9df7ce934c
Bump version 2022-10-29 20:09:52 -04:00
0ebb379a64
Update XML parsing
itemStruct's fields are now array elements. Tag parsing is now done
within handlers.c for more modularity.
2022-10-29 20:05:24 -04:00
73b0cad54f
handlers.c: make itemAction more modular 2022-10-29 20:04:46 -04:00
453e36317f
xml.c: merge into minrss.c 2022-10-27 20:36:51 -04:00
04eaae7230
Change enum names to all-caps 2022-10-24 20:40:18 -04:00
10 changed files with 482 additions and 381 deletions

View File

@ -1,15 +1,20 @@
PREFIX = ~/.local
VERSION = 0.1
VERSION = 0.2
# CC = cc
SRC = minrss.c util.c net.c xml.c
OBJ = $(SRC:.c=.o)
PKG_CONFIG = pkg-config
# Comment out if JSON output support isn't needed
JSONLIBS = `$(PKG_CONFIG) --libs json-c`
JSONINCS = `$(PKG_CONFIG) --cflags json-c`
JSONFLAG = -DJSON
CURL_CONFIG = curl-config
INCS = `$(PKG_CONFIG) --cflags libxml-2.0` `$(CURL_CONFIG) --cflags`
LIBS = `$(PKG_CONFIG) --libs libxml-2.0` `$(CURL_CONFIG) --libs`
SRC = minrss.c util.c net.c handlers.c
OBJ = $(SRC:.c=.o)
INCS = `$(PKG_CONFIG) --cflags libxml-2.0` `$(CURL_CONFIG) --cflags` $(JSONINC)
LIBS = `$(PKG_CONFIG) --libs libxml-2.0` `$(CURL_CONFIG) --libs` $(JSONLIBS)
WARN = -Wall -Wpedantic -Wextra
CFLAGS = $(INCS) $(LIBS) $(WARN) -DVERSION=\"$(VERSION)\"
CFLAGS = $(INCS) $(LIBS) $(WARN) -DVERSION=\"$(VERSION)\" $(JSONFLAG)
all: config.h minrss

11
README
View File

@ -1,8 +1,10 @@
MinRSS
======
MinRSS is an RSS/Atom feed reader for Linux inspired by suckless.org's
IRC clients ii and sic. Instead of presenting articles as entries
in a menu, it saves them as files in folders.
MinRSS is an RSS/Atom feed reader for Linux inspired by suckless.org's IRC
clients ii and sic. Instead of presenting articles as entries in a menu, it
saves them as files in folders.
These files can either be formatted as HTML, or as JSON to help with scripting.
rss
|--news
@ -16,6 +18,9 @@ Requirements
------------
You need libcurl and libxml2 to compile MinRSS.
json-c is required for JSON output. To disable this feature, comment out the
relevant lines in Makefile.
Installation
------------
Run this command to build MinRSS:

View File

@ -21,7 +21,6 @@ typedef struct {
.url = "https://example.com/rss/feed.rss",
// This will be used as the folder name for the feed.
.feedName = "examplefeed",
.tags = "test example sample"
},
*/
@ -50,5 +49,12 @@ static const int maxRedirs = 10;
// For more information: https://curl.se/libcurl/c/CURLOPT_PROTOCOLS.html
static const int curlProtocols = CURLPROTO_HTTPS | CURLPROTO_HTTP;
// File extension used for each article.
static const char fileExt[] = ".html";
enum outputFormats {
OUTPUT_HTML,
#ifdef JSON
OUTPUT_JSON,
#endif // JSON
};
// When saving, sets the format of the saved file.
static const enum outputFormats outputFormat = OUTPUT_HTML;

239
handlers.c Normal file
View File

@ -0,0 +1,239 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <libxml/xmlreader.h>
#ifdef JSON
#include <json-c/json.h>
#endif // JSON
#include "config.h"
#include "util.h"
#include "handlers.h"
void
freeItem(itemStruct *item)
{
for (int i = 0; i < FIELD_END; i++) {
if (item->fields[i])
free(item->fields[i]);
}
free(item);
}
static inline int
propIs(xmlChar *prop, char *name)
{
return !xmlStrcmp(prop, (const xmlChar *) name);
}
static void
allocField(char **field, char *str)
{
size_t len = strlen(str) + 1;
char *fieldStr = ecalloc(len, sizeof(char));
memcpy(fieldStr, str, len * sizeof(char));
*field = fieldStr;
}
void
copyField(itemStruct *item, enum fields field, char *str)
{
allocField(&item->fields[field], str);
}
int
atomLink(itemStruct *item, xmlNodePtr node)
{
xmlChar *href = xmlGetProp(node, (xmlChar *) "href");
xmlChar *rel = xmlGetProp(node, (xmlChar *) "rel");
if (!href) {
logMsg(1, "Invalid link tag.\n");
if (rel)
xmlFree(rel);
return 1;
}
if (!rel || propIs(rel, "alternate")) {
copyField(item, FIELD_LINK, (char *)href);
} else if (propIs(rel, "enclosure")) {
copyField(item, FIELD_ENCLOSURE_URL, (char *)href);
}
xmlFree(href);
xmlFree(rel);
return 0;
}
int
rssEnclosure(itemStruct *item, xmlNodePtr node)
{
xmlChar *href = xmlGetProp(node, (xmlChar *) "url");
if (!href) {
logMsg(1, "Invalid enclosure URL.\n");
return 1;
}
copyField(item, FIELD_ENCLOSURE_URL, (char *)href);
xmlFree(href);
return 0;
}
FILE *
openFile(const char *folder, char *fileName, char *fileExt)
{
// [folder]/[fileName][fileExt]
// caller's responsibility to sanitize names, but frees fileName
if (!folder) {
logMsg(1, "NULL folder");
return NULL;
} else if (!fileName) {
logMsg(1, "NULL file base name");
return NULL;
} else if (!fileExt) {
logMsg(1, "NULL file extension");
return NULL;
}
size_t folderLen = strlen(folder);
size_t extLen = strlen(fileExt);
size_t fileNameLen = strlen(fileName);
// +1 for null terminator and +1 for path separator
size_t pathLen = folderLen + 1 + fileNameLen + extLen + 1;
char *filePath;
if (fileName[0])
filePath = ecalloc(pathLen, sizeof(char));
else {
logMsg(1, "Invalid filename.\n");
return NULL;
}
memcpy(filePath, folder, folderLen * sizeof(char));
filePath[folderLen] = fsep();
filePath[pathLen - 1] = '\0';
memcpy(filePath + folderLen + 1, fileName, fileNameLen * sizeof(char));
memcpy(filePath + pathLen - extLen - 1, fileExt, extLen * sizeof(char));
FILE *itemFile = fopen(filePath, "a");
free (filePath);
free (fileName);
return itemFile;
}
static void
outputHtml(itemStruct *item, FILE *f)
{
if (item->fields[FIELD_TITLE])
fprintf(f, "<h1>%s</h1><br>\n", item->fields[FIELD_TITLE]);
if (item->fields[FIELD_LINK])
fprintf(f, "<a href=\"%s\">Link</a><br>\n", item->fields[FIELD_LINK]);
if (item->fields[FIELD_ENCLOSURE_URL])
fprintf(f, "<a href=\"%s\">Enclosure</a><br>\n", item->fields[FIELD_ENCLOSURE_URL]);
if (item->fields[FIELD_DESCRIPTION])
fprintf(f, "%s", item->fields[FIELD_DESCRIPTION]);
}
#ifdef JSON
static void
outputJson(itemStruct *item, FILE *f)
{
json_object *root = json_object_new_object();
if (item->fields[FIELD_TITLE])
json_object_object_add(root, "title",
json_object_new_string(item->fields[FIELD_TITLE]));
if (item->fields[FIELD_LINK])
json_object_object_add(root, "link",
json_object_new_string(item->fields[FIELD_LINK]));
if (item->fields[FIELD_ENCLOSURE_URL]) {
json_object *enclosure = json_object_new_object();
json_object_object_add(enclosure, "link",
json_object_new_string(item->fields[FIELD_ENCLOSURE_URL]));
json_object_object_add(root, "enclosure", enclosure);
}
if (item->fields[FIELD_DESCRIPTION])
json_object_object_add(root, "description",
json_object_new_string(item->fields[FIELD_DESCRIPTION]));
fprintf(f, "%s", json_object_to_json_string_ext(root, 0));
json_object_put(root);
}
#endif // JSON
void
itemAction(itemStruct *item, const char *folder)
{
// Receives a linked list of articles to process.
itemStruct *cur = item;
itemStruct *prev;
unsigned long long int newItems = 0;
while (cur) {
prev = cur;
char fileExt[10];
void (*outputFunction)(itemStruct *, FILE *);
switch (outputFormat) {
case OUTPUT_HTML:
memcpy(fileExt, ".html", 6);
outputFunction = &outputHtml;
break;
#ifdef JSON
case OUTPUT_JSON:
memcpy(fileExt, ".json", 6);
outputFunction = &outputJson;
break;
#endif //JSON
default:
logMsg(0, "Output format is invalid.");
break;
}
FILE *itemFile = openFile(folder, san(cur->fields[FIELD_TITLE]), fileExt);
// Do not overwrite files
if (!ftell(itemFile)) {
outputFunction(cur, itemFile);
newItems++;
}
fclose(itemFile);
cur = cur->next;
freeItem(prev);
}
if (newItems)
logMsg(2, "%s : %d new articles\n", folder, newItems);
}
void
finish(char *url, long responseCode)
{
// Executed after a download finishes
if (responseCode == 200)
logMsg(4, "Finished downloading %s\n", url);
else if (!responseCode)
logMsg(1, "Can not reach %s: ensure the protocol is enabled and the site is accessible.\n", url);
else
logMsg(1, "HTTP %ld for %s\n", responseCode, url);
}

36
handlers.h Normal file
View File

@ -0,0 +1,36 @@
enum feedFormat {
NONE,
RSS,
ATOM,
};
enum fields {
FIELD_TITLE,
FIELD_LINK,
FIELD_DESCRIPTION,
FIELD_ENCLOSURE_URL,
FIELD_END
};
enum numFields {
// currently unimplemented
NUM_ENCLOSURE_SIZE,
NUM_END
};
typedef struct itemStruct itemStruct;
struct itemStruct {
char *fields[FIELD_END];
int numFields[NUM_END];
itemStruct *next;
};
void copyField(itemStruct *item, enum fields field, char *str);
void freeItem(itemStruct *item);
void itemAction(itemStruct *item, const char *folder);
void finish(char *url, long responseCode);
int rssEnclosure(itemStruct *item, xmlNodePtr node);
int atomLink(itemStruct *item, xmlNodePtr node);

251
minrss.c
View File

@ -13,90 +13,191 @@ You should have received a copy of the GNU General Public License along with thi
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <libxml/xmlreader.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include "config.h"
#include "util.h"
#include "net.h"
#include "xml.h"
#include "handlers.h"
#include "config.h"
void
itemAction(itemStruct *item, const char *folder)
static inline int
tagIs(xmlNodePtr node, char *str)
{
// Receives a link list of articles to process.
itemStruct *cur = item;
unsigned long long int newItems = 0;
size_t folderLen = strlen(folder);
size_t extLen = strlen(fileExt);
while (cur) {
char *filePath;
char *fileName = san(cur->title, 1);
size_t fileNameLen = strlen(fileName);
itemStruct *prev = cur;
// +1 for null terminator and +1 for path separator
size_t pathLen = folderLen + fileNameLen + extLen + 2;
if (fileName[0])
filePath = ecalloc(pathLen, sizeof(char));
else {
logMsg(1, "Invalid article title.\n");
cur = cur->next;
freeItem(prev);
continue;
}
memcpy(filePath, folder, folderLen * sizeof(char));
filePath[folderLen] = fsep();
filePath[pathLen - 1] = '\0';
memcpy(filePath + folderLen + 1, fileName, fileNameLen * sizeof(char));
memcpy(filePath + pathLen - extLen - 1, fileExt, extLen * sizeof(char));
FILE *itemFile = fopen(filePath, "a");
free(filePath);
free(fileName);
// Do not overwrite files
if (!ftell(itemFile)) {
newItems++;
fprintf(itemFile, "<h1>%s</h1><br>\n", cur->title);
fprintf(itemFile, "<a href=\"%s\">Link</a><br>\n", san(cur->link, 0));
fprintf(itemFile, "%s", san(cur->description, 0));
}
fclose(itemFile);
cur = cur->next;
freeItem(prev);
}
if (newItems)
logMsg(2, "%s : %d new articles\n", folder, newItems);
return !xmlStrcmp(node->name, (const xmlChar *) str);
}
void
finish(char *url, long responseCode)
static int
parseXml(xmlDocPtr doc,
const char *feedName,
void itemAction(itemStruct *, const char *))
{
// Executed after a download finishes
// Parse the XML in a single document.
if (responseCode == 200)
logMsg(4, "Finished downloading %s\n", url);
else if (!responseCode)
logMsg(1, "Can not reach %s: ensure the protocol is enabled and the site is accessible.\n", url);
else
logMsg(1, "HTTP %ld for %s\n", responseCode, url);
if (!feedName || !feedName[0]) {
logMsg(1, "Missing feed name, please set one.\n");
return 1;
}
xmlNodePtr rootNode;
rootNode = xmlDocGetRootElement(doc);
if (!rootNode) {
logMsg(1, "Empty document for feed.\n");
return 1;
}
enum feedFormat format = NONE;
if (tagIs(rootNode, "rss")) {
format = RSS;
} else if (tagIs(rootNode, "feed")) {
if (!xmlStrcmp(rootNode->ns->href, (const xmlChar *) "http://www.w3.org/2005/Atom"))
format = ATOM;
}
if (format == NONE) {
logMsg(1, "XML document is not an RSS or Atom feed.\n");
return 1;
}
// Pointer to the first child of the root XML node
xmlNodePtr cur = rootNode->children;
switch (format) {
case RSS:
// Get channel XML tag
while(cur && !tagIs(cur, "channel"))
cur = cur->next;
if (!cur || !tagIs(cur, "channel")) {
logMsg(1, "Invalid RSS syntax.\n");
return 1;
}
// Set cur to child of channel
cur = cur->children;
break;
case ATOM:
// Set cur to child of feed
cur = rootNode->children;
break;
default:
logMsg(1, "Missing starting tag for format\n");
return 1;
}
// Previous item (to build a linked list later)
itemStruct *prev = NULL;
// Loop over articles (skipping non-article tags)
while (cur) {
short isArticle = 0;
switch (format) {
case RSS:
isArticle = tagIs(cur, "item");
break;
case ATOM:
isArticle = tagIs(cur, "entry");
break;
default:
logMsg(1, "Missing article tag name for format\n");
return 1;
}
if (isArticle) {
itemStruct *item = ecalloc(1, sizeof(itemStruct));
// Build a linked list of item structs to pass to itemAction()
item->next = prev;
prev = item;
xmlNodePtr itemNode = cur->children;
// Value within the tag
char *itemKey;
while (itemNode) {
itemKey = (char *)xmlNodeListGetString(doc, itemNode->children, 1);
switch (format) {
case RSS:
if (tagIs(itemNode, "link"))
copyField(item, FIELD_LINK, itemKey);
else if (tagIs(itemNode, "description"))
copyField(item, FIELD_DESCRIPTION, itemKey);
else if (tagIs(itemNode, "title"))
copyField(item, FIELD_TITLE, itemKey);
else if (tagIs(itemNode, "enclosure"))
rssEnclosure(item, itemNode);
break;
case ATOM:
if (tagIs(itemNode, "link"))
atomLink(item, itemNode);
else if (tagIs(itemNode, "content"))
copyField(item, FIELD_DESCRIPTION, itemKey);
else if (tagIs(itemNode, "title"))
copyField(item, FIELD_TITLE, itemKey);
break;
default:
break;
}
xmlFree(itemKey);
itemNode = itemNode->next;
}
}
cur = cur->next;
}
errno = 0;
int stat = mkdir((const char* ) feedName, S_IRWXU);
if (!stat && errno && errno != EEXIST) {
logMsg(1, "Error creating directory for feed.\n");
return 1;
}
itemAction(prev, feedName);
return 0;
}
int
readDoc(char *content,
const char *feedName,
void itemAction(itemStruct *, const char *))
{
// Initialize the XML document, read it, then free it
xmlDocPtr doc;
doc = xmlReadMemory(content, strlen(content), "noname.xml", NULL, 0);
if (!doc) {
logMsg(1, "XML parser error.\n");
return 1;
}
int stat = parseXml(doc, feedName, itemAction);
if (stat)
logMsg(1, "Skipped feed %s due to errors.\n", feedName);
xmlFreeDoc(doc);
return stat;
}
int

4
util.c
View File

@ -61,12 +61,10 @@ erealloc(void *p, size_t size)
}
char *
san(char *str, int rep)
san(char *str)
{
if (!str)
return "";
if (!rep)
return str;
unsigned long long int len = strlen(str);
unsigned long long int offset = 0;

2
util.h
View File

@ -14,5 +14,5 @@ You should have received a copy of the GNU General Public License along with thi
void logMsg(int argc, char *msg, ...);
void *ecalloc(size_t nmemb, size_t size);
void *erealloc(void *p, size_t nmemb);
char *san(char *str, int rep);
char *san(char *str);
char fsep();

258
xml.c
View File

@ -1,258 +0,0 @@
/*
This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program. If not, see https://www.gnu.org/licenses/.
© 2021 dogeystamp <dogeystamp@disroot.org>
*/
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <libxml/xmlreader.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include <string.h>
#include "util.h"
#include "xml.h"
#define TAGIS(X, Y) (!xmlStrcmp(X->name, (const xmlChar *) Y))
void
freeItem(itemStruct *item)
{
// Free the memory used by an article.
char **mems[] = {
&item->title,
&item->link,
&item->description,
};
for (unsigned long int i = 0; i < LEN(mems); i++) {
if (*mems[i])
free(*mems[i]);
}
free(item);
}
static int
parseXml(xmlDocPtr doc,
const char *feedName,
void itemAction(itemStruct *, const char *))
{
// Parse the XML in a single document.
if (!feedName || !feedName[0]) {
logMsg(1, "Missing feed name, please set one.\n");
return 1;
}
xmlNodePtr rootNode;
rootNode = xmlDocGetRootElement(doc);
if (!rootNode) {
logMsg(1, "Empty document for feed.\n");
return 1;
}
enum feedFormat format = none;
if (TAGIS(rootNode, "rss")) {
format = rss;
} else if (TAGIS(rootNode, "feed")) {
if (!xmlStrcmp(rootNode->ns->href, (const xmlChar *) "http://www.w3.org/2005/Atom"))
format = atom;
}
if (format == none) {
logMsg(1, "XML document is not an RSS or Atom feed.\n");
return 1;
}
// Pointer to the first child of the root XML node
xmlNodePtr cur = rootNode->children;
switch (format) {
case rss:
// Get channel XML tag
while(cur && !TAGIS(cur, "channel"))
cur = cur->next;
if (!cur || !TAGIS(cur, "channel")) {
logMsg(1, "Invalid RSS syntax.\n");
return 1;
}
// Set cur to child of channel
cur = cur->children;
break;
case atom:
// Set cur to child of feed
cur = rootNode->children;
break;
default:
logMsg(1, "Missing starting tag for format\n");
return 1;
}
// Previous item (to build a linked list later)
itemStruct *prev = NULL;
// Loop over articles (skipping non-article tags)
while (cur) {
short isArticle = 0;
switch (format) {
case rss:
isArticle = TAGIS(cur, "item");
break;
case atom:
isArticle = TAGIS(cur, "entry");
break;
default:
logMsg(1, "Missing article tag name for format\n");
return 1;
}
if (isArticle) {
itemStruct *item = ecalloc(1, sizeof(itemStruct));
// The selected set of attribute keys
char **attKeys;
// Struct variables to map attributes to
char **atts[] = {
&item->title,
&item->link,
&item->description,
};
// Attribute keys for each format
char *attKeysRss[LEN(atts)] = {
"title",
"link",
"description",
};
char *attKeysAtom[LEN(atts)] = {
"title",
// link has special treatment because its value is in href not within the tag
"",
"content",
};
switch (format) {
case rss:
attKeys = attKeysRss;
break;
case atom:
attKeys = attKeysAtom;
break;
default:
logMsg(1, "Missing article attribute keys for format\n");
return 1;
};
// Build a linked list of item structs to pass to itemAction()
item->next = prev;
prev = item;
xmlNodePtr itemNode = cur->children;
// Value within the tag
char *itemKey;
while (itemNode) {
itemKey = (char *)xmlNodeListGetString(doc, itemNode->children, 1);
if (itemKey) {
for (unsigned long int i = 0; i < LEN(atts); i++) {
if (TAGIS(itemNode, attKeys[i])) {
size_t keyLen = strlen(itemKey) + 1;
*atts[i] = ecalloc(keyLen, sizeof(char));
memcpy(*atts[i], itemKey, keyLen * sizeof(char));
break;
}
}
xmlFree(itemKey);
}
// Exceptions
// Atom entry link tag
if (format == atom && TAGIS(itemNode, "link")) {
xmlChar *link = xmlGetProp(itemNode, (xmlChar *) "href");
if (!link) {
logMsg(1, "Missing Atom entry link\n");
xmlFree(link);
return 1;
}
size_t linkLen = strlen((char *) link) + 1;
item->link = ecalloc(linkLen, sizeof(char));
memcpy(item->link, (char *) link, linkLen * sizeof(char));
xmlFree(link);
}
itemNode = itemNode->next;
}
}
cur = cur->next;
}
errno = 0;
int stat = mkdir((const char* ) feedName, S_IRWXU);
if (!stat && errno && errno != EEXIST) {
logMsg(1, "Error creating directory for feed.\n");
return 1;
}
itemAction(prev, feedName);
return 0;
}
int
readDoc(char *content,
const char *feedName,
void itemAction(itemStruct *, const char *))
{
// Initialize the XML document, read it, then free it
xmlDocPtr doc;
doc = xmlReadMemory(content, strlen(content), "noname.xml", NULL, 0);
if (!doc) {
logMsg(1, "XML parser error.\n");
return 1;
}
int stat = parseXml(doc, feedName, itemAction);
if (stat)
logMsg(1, "Skipped feed %s due to errors.\n", feedName);
xmlFreeDoc(doc);
return stat;
}

31
xml.h
View File

@ -1,31 +0,0 @@
/*
This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program. If not, see https://www.gnu.org/licenses/.
© 2021 dogeystamp <dogeystamp@disroot.org>
*/
typedef struct itemStruct itemStruct;
struct itemStruct {
char *title;
char *link;
char *description;
itemStruct *next;
};
enum feedFormat {
none,
rss,
atom
};
void freeItem(itemStruct *item);
int readDoc(
char *content,
const char *feedName,
void itemAction(itemStruct *, char const *chanTitle));