ref: 8fdbac99c8c167acc77ca43661a50e9360c905f8
author: sirjofri <sirjofri@sirjofri.de>
date: Mon Jun 8 17:42:51 EDT 2020
adds first working program. Only works with rss feeds (no atom!)
--- /dev/null
+++ b/.gitignore
@@ -1,0 +1,2 @@
+*.[0125678qv]
+[0125678qv].out
--- /dev/null
+++ b/date.h
@@ -1,0 +1,116 @@
+int
+str2mon(char *s)
+{
+ if(!strcmp(s, "Jan")) return 0;
+ if(!strcmp(s, "Feb")) return 1;
+ if(!strcmp(s, "Mar")) return 2;
+ if(!strcmp(s, "Apr")) return 3;
+ if(!strcmp(s, "May")) return 4;
+ if(!strcmp(s, "Jun")) return 5;
+ if(!strcmp(s, "Jul")) return 6;
+ if(!strcmp(s, "Aug")) return 7;
+ if(!strcmp(s, "Sep")) return 8;
+ if(!strcmp(s, "Oct")) return 9;
+ if(!strcmp(s, "Nov")) return 10;
+ if(!strcmp(s, "Dec")) return 11;
+ return 0;
+}
+
+int
+str2wday(char *s)
+{
+ if(!strcmp(s, "Sun")) return 0;
+ if(!strcmp(s, "Mon")) return 1;
+ if(!strcmp(s, "Tue")) return 2;
+ if(!strcmp(s, "Wed")) return 3;
+ if(!strcmp(s, "Thu")) return 4;
+ if(!strcmp(s, "Fri")) return 5;
+ if(!strcmp(s, "Sat")) return 6;
+ return 0;
+}
+
+int
+doty(int day, int month, int year)
+{
+ int n, i;
+
+ n = 0;
+ for(i = 0; i < month; i++){
+ if(i == 0) n += 31;
+ if(i == 1){
+ if(year%4 == 0 && year%100 == 0 && year%400 != 0)
+ n += 29;
+ else
+ n += 28;
+ }
+ if(i == 2) n += 31;
+ if(i == 3) n += 30;
+ if(i == 4) n += 31;
+ if(i == 5) n += 30;
+ if(i == 6) n += 31;
+ if(i == 7) n += 31;
+ if(i == 8) n += 30;
+ if(i == 9) n += 31;
+ if(i == 10) n += 30;
+ if(i == 11) n += 31;
+ }
+
+ n += day;
+
+ return n;
+}
+
+long
+parsedate(char *s)
+{
+ Tm ret;
+ char input[64];
+ char *args[8];
+ int n, i;
+
+ strcpy(input, s);
+ n = getfields(input, args, 8, 1, ", :");
+
+ if(n < 8)
+ sysfatal("error parsing pubDate: %s", s);
+
+ for(i = 0; i < n; i++){
+ if(!args[i])
+ sysfatal("error parsing pubDate: %s", s);
+ switch(i){
+ case 0: /* day of the week */
+ ret.wday = str2wday(args[i]);
+ break;
+ case 1: /* day of the month */
+ ret.mday = atoi(args[i]);
+ break;
+ case 2: /* month of the year */
+ ret.mon = str2mon(args[i]);
+ break;
+ case 3: /* year */
+ ret.year = atoi(args[i]) - 1900;
+ break;
+ case 4: /* hour */
+ ret.hour = atoi(args[i]);
+ break;
+ case 5: /* minute */
+ ret.min = atoi(args[i]);
+ break;
+ case 6: /* second */
+ ret.sec = atoi(args[i]);
+ break;
+ case 7: /* timezone offset */
+ ret.tzoff = atoi(args[i])/100;
+ break;
+ }
+ }
+ /*
+ ret.zone[0] = 'C';
+ ret.zone[1] = 'E';
+ ret.zone[2] = 'S';
+ ret.zone[3] = 'T';
+ */
+ ret.yday = doty(ret.mday, ret.mon, ret.year);
+
+ return tm2sec(&ret) - ret.tzoff*60*60;
+}
\ No newline at end of file
--- /dev/null
+++ b/fetchnews.rc
@@ -1,0 +1,12 @@
+#!/bin/rc
+
+O=6
+
+urls=( https://www.tagesschau.de/xml/rss2 https://lukesmith.xyz/rss.xml )
+prefixes=( tschau lukesmith )
+
+ramfs -m /lib/news
+
+for(i in `{seq 1 $#urls}){
+ hget $urls($i) | $O.out -c -p $prefixes($i)
+}
--- /dev/null
+++ b/mkfile
@@ -1,0 +1,7 @@
+</$objtype/mkfile
+
+TARG=rssfill
+OFILES=rssfill.$O
+HFILES=rssfill.h xmlpull.h date.h
+
+</sys/src/cmd/mkone
--- /dev/null
+++ b/rssfill.c
@@ -1,0 +1,321 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "xmlpull.h"
+#include "rssfill.h"
+#include "date.h"
+
+char *directory = "/lib/news";
+char *prefix = "";
+
+int chatty = 0;
+int dry = 0;
+
+void
+usage(void)
+{
+ fprint(2, "usage: %s "
+ "[ -ct ] "
+ "[ -p prefix ] "
+ "[ -d directory ]\n", argv0);
+ exits("usage");
+}
+
+void
+writefeedfiles(Feed *f)
+{
+ int fd;
+ char file[1024];
+ long d;
+ Dir dir;
+
+ if(f != nil){
+ while(f->n != nil)
+ f = f->n;
+
+ while(f != nil){
+ if(f->s == 2){
+ d = parsedate(f->date);
+ snprint(file, 1023, "%s/%s%ld", directory, prefix, d);
+
+ fd = create(file, OWRITE, 0666);
+ if(!fd)
+ sysfatal("error creating file %s: %r", file);
+
+ if(chatty)
+ fprint(2, "writing file %s\n", file);
+
+ if(dry){
+ f = f->p;
+ continue;
+ }
+ if(f->title != nil)
+ fprint(fd, "title: %s\n", f->title);
+ if(f->date != nil)
+ fprint(fd, "pubDate: %s (%ld)\n", f->date, d);
+ if(f->link != nil)
+ fprint(fd, "link: %s\n", f->link);
+ if(f->desc != nil)
+ fprint(fd, "\n%s\n", f->desc);
+
+ nulldir(&dir);
+ dir.mtime = d;
+ dirfwstat(fd, &dir);
+
+ close(fd);
+ }
+ f = f->p;
+ }
+ }
+}
+
+void
+freefeed(Feed *f)
+{
+ if(f != nil){
+ if(f->title != nil)
+ free(f->title);
+ if(f->link != nil)
+ free(f->link);
+ if(f->desc != nil)
+ free(f->desc);
+ if(f->date != nil)
+ free(f->date);
+ free(f);
+ }
+ return;
+}
+
+void
+freefeedt(Feed *r)
+{
+ while(r != nil){
+ if(r->n != nil){
+ r = r->n;
+ freefeed(r->p);
+ } else {
+ freefeed(r);
+ r = nil;
+ }
+ }
+}
+
+Feed *
+searchfeed(Feed *r, char *title, char *link, char *desc, char *date)
+{
+ while(r != nil){
+ if(r->title != nil && title != nil){
+ if(!strcmp(r->title, title)){
+ r->s = 1;
+ return r;
+ }
+ }
+ if(r->link != nil && link != nil){
+ if(!strcmp(r->link, link)){
+ r->s = 1;
+ return r;
+ }
+ }
+ if(r->desc != nil && desc != nil){
+ if(!strcmp(r->desc, desc)){
+ r->s = 1;
+ return r;
+ }
+ }
+ if(r->date != nil && date != nil){
+ if(!strcmp(r->date, date)){
+ r->s = 1;
+ return r;
+ }
+ }
+ r = r->n;
+ }
+ return nil;
+}
+
+Feed *
+addfeed(Feed *r, Feed *f)
+{
+ Feed *ret;
+
+ ret = r;
+ f->s = 2;
+ if(r != nil) {
+ while(r->n != nil)
+ r = r->n;
+ } else
+ return f;
+ r->n = f;
+ f->p = r;
+
+ return ret;
+}
+
+Feed *
+removefeed(Feed *r, Feed *f)
+{
+ if(f->n != nil && f->p != nil){
+ f->n->p = f->p;
+ f->p->n = f->n;
+ } else {
+ if(f->n != nil){
+ f->n->p = nil;
+ r = f->n;
+ }
+ if(f->p != nil)
+ f->p->n = nil;
+ }
+ freefeed(f);
+
+ return r;
+}
+
+Feed *
+checkfeed(Feed *r)
+{
+ Feed *a;
+
+ a = r;
+
+ while(a != nil){
+ if(a->s == 0)
+ r = removefeed(r, a);
+ else
+ a->s = 0;
+ a = a->n;
+ }
+
+ return r;
+}
+
+void
+main(int argc, char **argv)
+{
+ xmlpull *x, *a;
+ char st;
+ Feed *f, *r;
+
+ ARGBEGIN {
+ case 'd':
+ directory = EARGF(usage());
+ break;
+ case 'p':
+ prefix = EARGF(usage());
+ break;
+ case 't':
+ dry = 1;
+ break;
+ case 'c':
+ chatty = 1;
+ break;
+ } ARGEND;
+
+ if(dry)
+ chatty = 1;
+
+ st = NONE;
+ f = nil;
+ r = nil;
+
+ x = openxmlpull(0);
+ while((a = nextxmlpull(x)) != nil && st != END){
+ switch(a->ev){
+ case START_DOCUMENT:
+ break;
+ case START_TAG:
+ if(!strcmp(x->na, "item") || !strcmp(x->na, "entry")){
+ if(f != nil)
+ freefeed(f);
+ f = mallocz(sizeof(Feed), 2);
+ st = ITEM;
+ break;
+ }
+ if(!strcmp(x->na, "title") && st == ITEM){
+ st = TITLE;
+ break;
+ }
+ if(!strcmp(x->na, "description") && st == ITEM){
+ st = DESC;
+ break;
+ }
+ if(!strcmp(x->na, "link") && st == ITEM){
+ st = LINK;
+ break;
+ }
+ if(!strcmp(x->na, "pubDate") && st == ITEM){
+ st = DATE;
+ break;
+ }
+ break;
+ case START_END_TAG:
+ break;
+ case ATTR:
+ if(!strcmp(x->na, "href") && st == LINK)
+ f->link = strdup(x->va);
+ break;
+ case TEXT:
+ switch(st){
+ case TITLE:
+ f->title = strdup(x->na);
+ break;
+ case LINK:
+ f->link = strdup(x->na);
+ break;
+ case DESC:
+ f->desc = strdup(x->na);
+ break;
+ case DATE:
+ f->date = strdup(x->na);
+ break;
+ default:
+ break;
+ }
+ break;
+ case END_TAG:
+ if((!strcmp(x->na, "item") || !strcmp(x->na, "entry")) && st == ITEM){
+ if(searchfeed(r, f->title, f->link, f->desc, f->date) == nil){
+ r = addfeed(r, f);
+ f = nil;
+ } else {
+ freefeed(f);
+ f = nil;
+ }
+
+ st = NONE;
+ break;
+ }
+ if(!strcmp(x->na, "title") && st == TITLE){
+ st = ITEM;
+ break;
+ }
+ if(!strcmp(x->na, "link") && st == LINK){
+ st = ITEM;
+ break;
+ }
+ if(!strcmp(x->na, "description") && st == DESC){
+ st = ITEM;
+ break;
+ }
+ if(!strcmp(x->na, "pubDate") && st == DATE){
+ st = ITEM;
+ break;
+ }
+ if(!strcmp(x->na, "rdf:RDF") || !strcmp(x->na, "items")
+ || !strcmp(x->na, "rss") || !strcmp(x->na, "feed")){
+ writefeedfiles(r);
+ r = checkfeed(r);
+ break;
+ }
+ break;
+ case END_DOCUMENT:
+ st = END;
+ break;
+ default:
+ sysfatal("Error, should never happen: %x", x->ev);
+ break;
+ }
+ }
+ freexmlpull(x);
+ freefeedt(r);
+ exits(nil);
+}
--- /dev/null
+++ b/rssfill.h
@@ -1,0 +1,20 @@
+typedef struct Feed Feed;
+struct Feed {
+ char *title;
+ char *link;
+ char *desc;
+ char *date;
+ int s;
+ Feed *n;
+ Feed *p;
+};
+
+enum {
+ NONE = 0x00,
+ ITEM,
+ TITLE,
+ LINK,
+ DESC,
+ DATE,
+ END,
+};
--- /dev/null
+++ b/xmlpull.h
@@ -1,0 +1,51 @@
+/*
+ * Copy me if you can.
+ * by 20h
+ */
+
+#ifdef nil
+#pragma lib "libxmlpull.a"
+#endif
+
+#ifndef XMLPULL_H
+#define XMLPULL_H
+
+#ifndef nil
+#define nil NULL
+#define print printf
+#define snprint snprintf
+#define exits return
+#endif
+
+enum {
+ START_DOCUMENT = 0x0,
+ START_TAG,
+ START_END_TAG,
+ TEXT,
+ TEXT_C,
+ ATTR,
+ END_TAG,
+ END_TAG_S,
+ END_TAG_N,
+ END_DOCUMENT,
+};
+
+typedef struct xmlpull xmlpull;
+struct xmlpull {
+ int fd;
+ char ev;
+ char nev;
+ char *lm;
+ char *na;
+ char *va;
+ int la;
+ int lv;
+ int ln;
+};
+
+void freexmlpull(xmlpull *x);
+xmlpull *openxmlpull(int fd);
+xmlpull *nextxmlpull(xmlpull *x);
+xmlpull *writexmlpull(xmlpull *x);
+
+#endif