2022-11-05 16:45:03 -04:00
|
|
|
/*
|
|
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License along with this program. If not, see https://www.gnu.org/licenses/.
|
|
|
|
|
|
|
|
© 2022 dogeystamp <dogeystamp@disroot.org>
|
|
|
|
*/
|
|
|
|
|
2022-10-27 21:17:24 -04:00
|
|
|
#include <stdlib.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <string.h>
|
2022-10-29 17:48:36 -04:00
|
|
|
#include <libxml/parser.h>
|
|
|
|
#include <libxml/tree.h>
|
|
|
|
#include <libxml/xmlreader.h>
|
2022-10-29 22:29:34 -04:00
|
|
|
#ifdef JSON
|
|
|
|
#include <json-c/json.h>
|
|
|
|
#endif // JSON
|
2022-10-27 21:17:24 -04:00
|
|
|
|
|
|
|
#include "config.h"
|
|
|
|
#include "util.h"
|
2022-10-29 17:48:36 -04:00
|
|
|
#include "handlers.h"
|
2022-10-27 21:17:24 -04:00
|
|
|
|
2022-10-29 17:48:36 -04:00
|
|
|
void
|
|
|
|
freeItem(itemStruct *item)
|
|
|
|
{
|
|
|
|
for (int i = 0; i < FIELD_END; i++) {
|
|
|
|
if (item->fields[i])
|
|
|
|
free(item->fields[i]);
|
|
|
|
}
|
|
|
|
free(item);
|
|
|
|
}
|
2022-10-27 21:17:24 -04:00
|
|
|
|
2022-10-29 17:48:36 -04:00
|
|
|
static inline int
|
|
|
|
propIs(xmlChar *prop, char *name)
|
|
|
|
{
|
|
|
|
return !xmlStrcmp(prop, (const xmlChar *) name);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
allocField(char **field, char *str)
|
|
|
|
{
|
|
|
|
size_t len = strlen(str) + 1;
|
|
|
|
char *fieldStr = ecalloc(len, sizeof(char));
|
|
|
|
memcpy(fieldStr, str, len * sizeof(char));
|
|
|
|
*field = fieldStr;
|
|
|
|
}
|
2022-10-27 21:17:24 -04:00
|
|
|
|
|
|
|
void
|
2022-10-29 17:48:36 -04:00
|
|
|
copyField(itemStruct *item, enum fields field, char *str)
|
2022-10-27 21:17:24 -04:00
|
|
|
{
|
2023-01-15 21:31:52 -05:00
|
|
|
if (!str) {
|
2023-02-04 10:46:22 -05:00
|
|
|
logMsg(LOG_ERROR, "Attempted to assign a null pointer to a field!\n");
|
2023-01-15 21:31:52 -05:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2022-10-29 17:48:36 -04:00
|
|
|
allocField(&item->fields[field], str);
|
|
|
|
}
|
2022-10-27 21:17:24 -04:00
|
|
|
|
2022-10-29 17:48:36 -04:00
|
|
|
int
|
|
|
|
atomLink(itemStruct *item, xmlNodePtr node)
|
|
|
|
{
|
|
|
|
xmlChar *href = xmlGetProp(node, (xmlChar *) "href");
|
|
|
|
xmlChar *rel = xmlGetProp(node, (xmlChar *) "rel");
|
|
|
|
|
|
|
|
if (!href) {
|
2023-02-04 10:46:22 -05:00
|
|
|
logMsg(LOG_ERROR, "Invalid link tag.\n");
|
2022-10-29 17:48:36 -04:00
|
|
|
if (rel)
|
|
|
|
xmlFree(rel);
|
|
|
|
return 1;
|
|
|
|
}
|
2022-10-27 21:17:24 -04:00
|
|
|
|
2022-10-29 22:28:54 -04:00
|
|
|
if (!rel || propIs(rel, "alternate")) {
|
2022-10-29 17:48:36 -04:00
|
|
|
copyField(item, FIELD_LINK, (char *)href);
|
|
|
|
} else if (propIs(rel, "enclosure")) {
|
|
|
|
copyField(item, FIELD_ENCLOSURE_URL, (char *)href);
|
2023-03-22 20:14:27 -04:00
|
|
|
|
|
|
|
xmlChar *enclosure_type = xmlGetProp(node, (xmlChar *) "type");
|
|
|
|
copyField(item, FIELD_ENCLOSURE_TYPE, (char *)enclosure_type);
|
|
|
|
xmlFree(enclosure_type);
|
2022-10-27 21:17:24 -04:00
|
|
|
}
|
|
|
|
|
2022-10-29 17:48:36 -04:00
|
|
|
xmlFree(href);
|
|
|
|
xmlFree(rel);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
rssEnclosure(itemStruct *item, xmlNodePtr node)
|
|
|
|
{
|
|
|
|
xmlChar *href = xmlGetProp(node, (xmlChar *) "url");
|
|
|
|
if (!href) {
|
2023-02-04 10:46:22 -05:00
|
|
|
logMsg(LOG_ERROR, "Invalid enclosure URL.\n");
|
2022-10-29 17:48:36 -04:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
copyField(item, FIELD_ENCLOSURE_URL, (char *)href);
|
|
|
|
xmlFree(href);
|
2023-03-22 20:14:27 -04:00
|
|
|
|
|
|
|
xmlChar *enclosure_type = xmlGetProp(node, (xmlChar *) "type");
|
|
|
|
copyField(item, FIELD_ENCLOSURE_TYPE, (char *)enclosure_type);
|
|
|
|
xmlFree(enclosure_type);
|
2022-10-29 17:48:36 -04:00
|
|
|
|
|
|
|
return 0;
|
2022-10-27 21:17:24 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
FILE *
|
|
|
|
openFile(const char *folder, char *fileName, char *fileExt)
|
|
|
|
{
|
|
|
|
// [folder]/[fileName][fileExt]
|
2023-02-06 20:13:38 -05:00
|
|
|
// caller's responsibility to sanitize names and free fileName
|
2022-10-27 21:17:24 -04:00
|
|
|
|
|
|
|
if (!folder) {
|
2023-02-04 10:46:22 -05:00
|
|
|
logMsg(LOG_ERROR, "NULL folder");
|
2022-10-27 21:17:24 -04:00
|
|
|
return NULL;
|
|
|
|
} else if (!fileName) {
|
2023-02-04 10:46:22 -05:00
|
|
|
logMsg(LOG_ERROR, "NULL file base name");
|
2022-10-27 21:17:24 -04:00
|
|
|
return NULL;
|
|
|
|
} else if (!fileExt) {
|
2023-02-04 10:46:22 -05:00
|
|
|
logMsg(LOG_ERROR, "NULL file extension");
|
2022-10-27 21:17:24 -04:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t folderLen = strlen(folder);
|
|
|
|
size_t extLen = strlen(fileExt);
|
|
|
|
size_t fileNameLen = strlen(fileName);
|
|
|
|
|
|
|
|
// +1 for null terminator and +1 for path separator
|
|
|
|
size_t pathLen = folderLen + 1 + fileNameLen + extLen + 1;
|
|
|
|
|
|
|
|
char *filePath;
|
|
|
|
|
|
|
|
if (fileName[0])
|
|
|
|
filePath = ecalloc(pathLen, sizeof(char));
|
|
|
|
else {
|
2023-02-04 10:46:22 -05:00
|
|
|
logMsg(LOG_ERROR, "Invalid filename.\n");
|
2022-10-27 21:17:24 -04:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
memcpy(filePath, folder, folderLen * sizeof(char));
|
|
|
|
|
|
|
|
filePath[folderLen] = fsep();
|
|
|
|
filePath[pathLen - 1] = '\0';
|
|
|
|
|
|
|
|
memcpy(filePath + folderLen + 1, fileName, fileNameLen * sizeof(char));
|
|
|
|
memcpy(filePath + pathLen - extLen - 1, fileExt, extLen * sizeof(char));
|
|
|
|
|
|
|
|
FILE *itemFile = fopen(filePath, "a");
|
|
|
|
free (filePath);
|
|
|
|
|
|
|
|
return itemFile;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2023-06-16 15:24:51 -04:00
|
|
|
outputHtml(itemStruct *item, FILE *f, const char *folder)
|
2022-10-27 21:17:24 -04:00
|
|
|
{
|
2022-10-29 20:26:04 -04:00
|
|
|
if (item->fields[FIELD_TITLE])
|
|
|
|
fprintf(f, "<h1>%s</h1><br>\n", item->fields[FIELD_TITLE]);
|
2023-06-16 15:24:51 -04:00
|
|
|
|
|
|
|
fprintf(f, "From feed <b>%s</b><br>\n", folder);
|
|
|
|
|
2022-10-29 20:26:04 -04:00
|
|
|
if (item->fields[FIELD_LINK])
|
|
|
|
fprintf(f, "<a href=\"%s\">Link</a><br>\n", item->fields[FIELD_LINK]);
|
|
|
|
if (item->fields[FIELD_ENCLOSURE_URL])
|
|
|
|
fprintf(f, "<a href=\"%s\">Enclosure</a><br>\n", item->fields[FIELD_ENCLOSURE_URL]);
|
2023-03-22 20:14:27 -04:00
|
|
|
if (item->fields[FIELD_ENCLOSURE_TYPE])
|
|
|
|
fprintf(f, "Enclosure type: %s\n", item->fields[FIELD_ENCLOSURE_TYPE]);
|
2022-10-29 20:26:04 -04:00
|
|
|
if (item->fields[FIELD_DESCRIPTION])
|
|
|
|
fprintf(f, "%s", item->fields[FIELD_DESCRIPTION]);
|
2022-10-27 21:17:24 -04:00
|
|
|
}
|
|
|
|
|
2022-10-29 22:29:34 -04:00
|
|
|
#ifdef JSON
|
|
|
|
static void
|
2023-06-16 15:24:51 -04:00
|
|
|
outputJson(itemStruct *item, FILE *f, const char *folder)
|
2022-10-29 22:29:34 -04:00
|
|
|
{
|
|
|
|
json_object *root = json_object_new_object();
|
|
|
|
|
2023-06-16 15:24:51 -04:00
|
|
|
json_object_object_add(root, "feedname",
|
|
|
|
json_object_new_string(folder));
|
|
|
|
|
2022-10-29 22:29:34 -04:00
|
|
|
if (item->fields[FIELD_TITLE])
|
|
|
|
json_object_object_add(root, "title",
|
|
|
|
json_object_new_string(item->fields[FIELD_TITLE]));
|
|
|
|
|
|
|
|
if (item->fields[FIELD_LINK])
|
|
|
|
json_object_object_add(root, "link",
|
|
|
|
json_object_new_string(item->fields[FIELD_LINK]));
|
|
|
|
|
|
|
|
if (item->fields[FIELD_ENCLOSURE_URL]) {
|
|
|
|
json_object *enclosure = json_object_new_object();
|
|
|
|
json_object_object_add(enclosure, "link",
|
|
|
|
json_object_new_string(item->fields[FIELD_ENCLOSURE_URL]));
|
2023-03-22 20:14:27 -04:00
|
|
|
if (item->fields[FIELD_ENCLOSURE_TYPE]) {
|
|
|
|
json_object_object_add(enclosure, "type",
|
|
|
|
json_object_new_string(item->fields[FIELD_ENCLOSURE_TYPE]));
|
|
|
|
}
|
2022-10-29 22:29:34 -04:00
|
|
|
json_object_object_add(root, "enclosure", enclosure);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (item->fields[FIELD_DESCRIPTION])
|
|
|
|
json_object_object_add(root, "description",
|
|
|
|
json_object_new_string(item->fields[FIELD_DESCRIPTION]));
|
|
|
|
|
|
|
|
fprintf(f, "%s", json_object_to_json_string_ext(root, 0));
|
|
|
|
json_object_put(root);
|
|
|
|
}
|
|
|
|
#endif // JSON
|
|
|
|
|
2023-02-06 20:13:38 -05:00
|
|
|
int
|
|
|
|
processItem(itemStruct *item, const char *folder)
|
|
|
|
{
|
|
|
|
// Returns 1 if the article is new, 0 if not, -1 for error.
|
|
|
|
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
char fileExt[10];
|
2023-06-16 15:24:51 -04:00
|
|
|
void (*outputFunction)(itemStruct *, FILE *, const char *);
|
2023-02-06 20:13:38 -05:00
|
|
|
|
|
|
|
switch (outputFormat) {
|
|
|
|
case OUTPUT_HTML:
|
|
|
|
memcpy(fileExt, ".html", 6);
|
|
|
|
outputFunction = &outputHtml;
|
|
|
|
break;
|
|
|
|
#ifdef JSON
|
|
|
|
case OUTPUT_JSON:
|
|
|
|
memcpy(fileExt, ".json", 6);
|
|
|
|
outputFunction = &outputJson;
|
|
|
|
break;
|
|
|
|
#endif //JSON
|
|
|
|
|
|
|
|
default:
|
|
|
|
logMsg(LOG_FATAL, "Output format is invalid.\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
char *basename = san(item->fields[FIELD_TITLE]);
|
|
|
|
|
|
|
|
FILE *itemFile = openFile(folder, basename, fileExt);
|
|
|
|
|
|
|
|
if (!itemFile) {
|
|
|
|
logMsg(LOG_ERROR, "Could not open file '%s/%s/%s'.\n",
|
|
|
|
folder,
|
|
|
|
basename,
|
|
|
|
fileExt
|
|
|
|
);
|
|
|
|
|
|
|
|
ret = -1;
|
|
|
|
goto cleanup_basename;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Do not overwrite files
|
|
|
|
if (!ftell(itemFile)) {
|
2023-06-16 15:24:51 -04:00
|
|
|
outputFunction(item, itemFile, folder);
|
2023-02-06 20:13:38 -05:00
|
|
|
ret = 1;
|
|
|
|
if (summaryFormat == SUMMARY_FILES)
|
|
|
|
logMsg(LOG_OUTPUT, "%s%c%s%s\n", folder, fsep(), basename, fileExt);
|
|
|
|
}
|
|
|
|
|
|
|
|
fclose(itemFile);
|
|
|
|
cleanup_basename:
|
|
|
|
free(basename);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2022-10-27 21:17:24 -04:00
|
|
|
void
|
|
|
|
itemAction(itemStruct *item, const char *folder)
|
|
|
|
{
|
|
|
|
// Receives a linked list of articles to process.
|
|
|
|
|
|
|
|
itemStruct *cur = item;
|
|
|
|
itemStruct *prev;
|
|
|
|
|
|
|
|
unsigned long long int newItems = 0;
|
|
|
|
|
|
|
|
while (cur) {
|
|
|
|
prev = cur;
|
2023-02-06 20:13:38 -05:00
|
|
|
int res = processItem(cur, folder);
|
|
|
|
if (res == 1)
|
2022-10-27 21:17:24 -04:00
|
|
|
newItems++;
|
|
|
|
cur = cur->next;
|
|
|
|
freeItem(prev);
|
|
|
|
}
|
|
|
|
|
2023-02-06 20:13:38 -05:00
|
|
|
switch (summaryFormat) {
|
|
|
|
case SUMMARY_HUMAN_READABLE:
|
|
|
|
if (newItems)
|
|
|
|
logMsg(LOG_OUTPUT, "%s : %d new articles\n", folder, newItems);
|
|
|
|
break;
|
|
|
|
case SUMMARY_FILES:
|
|
|
|
// print output after saving each file
|
|
|
|
break;
|
|
|
|
}
|
2022-10-27 21:17:24 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
finish(char *url, long responseCode)
|
|
|
|
{
|
|
|
|
// Executed after a download finishes
|
|
|
|
|
|
|
|
if (responseCode == 200)
|
2023-02-04 10:46:22 -05:00
|
|
|
logMsg(LOG_VERBOSE, "Finished downloading %s\n", url);
|
2022-10-27 21:17:24 -04:00
|
|
|
else if (!responseCode)
|
2023-02-04 10:46:22 -05:00
|
|
|
logMsg(LOG_ERROR, "Can not reach %s: ensure the protocol is enabled and the site is accessible.\n", url);
|
2022-10-27 21:17:24 -04:00
|
|
|
else
|
2023-02-04 10:46:22 -05:00
|
|
|
logMsg(LOG_ERROR, "HTTP %ld for %s\n", responseCode, url);
|
2022-10-27 21:17:24 -04:00
|
|
|
}
|