ref: 8fdbac99c8c167acc77ca43661a50e9360c905f8
author: sirjofri <sirjofri@sirjofri.de>
date: Mon Jun 8 17:42:51 EDT 2020
adds first working program. Only works with rss feeds (no atom!)
--- /dev/null
+++ b/.gitignore
@@ -1,0 +1,2 @@
+*.[0125678qv]
+[0125678qv].out
--- /dev/null
+++ b/date.h
@@ -1,0 +1,116 @@
+int
+str2mon(char *s)
+{+ if(!strcmp(s, "Jan")) return 0;
+ if(!strcmp(s, "Feb")) return 1;
+ if(!strcmp(s, "Mar")) return 2;
+ if(!strcmp(s, "Apr")) return 3;
+ if(!strcmp(s, "May")) return 4;
+ if(!strcmp(s, "Jun")) return 5;
+ if(!strcmp(s, "Jul")) return 6;
+ if(!strcmp(s, "Aug")) return 7;
+ if(!strcmp(s, "Sep")) return 8;
+ if(!strcmp(s, "Oct")) return 9;
+ if(!strcmp(s, "Nov")) return 10;
+ if(!strcmp(s, "Dec")) return 11;
+ return 0;
+}
+
+int
+str2wday(char *s)
+{+ if(!strcmp(s, "Sun")) return 0;
+ if(!strcmp(s, "Mon")) return 1;
+ if(!strcmp(s, "Tue")) return 2;
+ if(!strcmp(s, "Wed")) return 3;
+ if(!strcmp(s, "Thu")) return 4;
+ if(!strcmp(s, "Fri")) return 5;
+ if(!strcmp(s, "Sat")) return 6;
+ return 0;
+}
+
+int
+doty(int day, int month, int year)
+{+ int n, i;
+
+ n = 0;
+ for(i = 0; i < month; i++){+ if(i == 0) n += 31;
+ if(i == 1){+ if(year%4 == 0 && year%100 == 0 && year%400 != 0)
+ n += 29;
+ else
+ n += 28;
+ }
+ if(i == 2) n += 31;
+ if(i == 3) n += 30;
+ if(i == 4) n += 31;
+ if(i == 5) n += 30;
+ if(i == 6) n += 31;
+ if(i == 7) n += 31;
+ if(i == 8) n += 30;
+ if(i == 9) n += 31;
+ if(i == 10) n += 30;
+ if(i == 11) n += 31;
+ }
+
+ n += day;
+
+ return n;
+}
+
+long
+parsedate(char *s)
+{+ Tm ret;
+ char input[64];
+ char *args[8];
+ int n, i;
+
+ strcpy(input, s);
+ n = getfields(input, args, 8, 1, ", :");
+
+ if(n < 8)
+ sysfatal("error parsing pubDate: %s", s);+
+ for(i = 0; i < n; i++){+ if(!args[i])
+ sysfatal("error parsing pubDate: %s", s);+ switch(i){+ case 0: /* day of the week */
+ ret.wday = str2wday(args[i]);
+ break;
+ case 1: /* day of the month */
+ ret.mday = atoi(args[i]);
+ break;
+ case 2: /* month of the year */
+ ret.mon = str2mon(args[i]);
+ break;
+ case 3: /* year */
+ ret.year = atoi(args[i]) - 1900;
+ break;
+ case 4: /* hour */
+ ret.hour = atoi(args[i]);
+ break;
+ case 5: /* minute */
+ ret.min = atoi(args[i]);
+ break;
+ case 6: /* second */
+ ret.sec = atoi(args[i]);
+ break;
+ case 7: /* timezone offset */
+ ret.tzoff = atoi(args[i])/100;
+ break;
+ }
+ }
+ /*
+ ret.zone[0] = 'C';
+ ret.zone[1] = 'E';
+ ret.zone[2] = 'S';
+ ret.zone[3] = 'T';
+ */
+ ret.yday = doty(ret.mday, ret.mon, ret.year);
+
+ return tm2sec(&ret) - ret.tzoff*60*60;
+}
\ No newline at end of file
--- /dev/null
+++ b/fetchnews.rc
@@ -1,0 +1,12 @@
+#!/bin/rc
+
+O=6
+
+urls=( https://www.tagesschau.de/xml/rss2 https://lukesmith.xyz/rss.xml )
+prefixes=( tschau lukesmith )
+
+ramfs -m /lib/news
+
+for(i in `{seq 1 $#urls}){+ hget $urls($i) | $O.out -c -p $prefixes($i)
+}
--- /dev/null
+++ b/mkfile
@@ -1,0 +1,7 @@
+</$objtype/mkfile
+
+TARG=rssfill
+OFILES=rssfill.$O
+HFILES=rssfill.h xmlpull.h date.h
+
+</sys/src/cmd/mkone
--- /dev/null
+++ b/rssfill.c
@@ -1,0 +1,321 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "xmlpull.h"
+#include "rssfill.h"
+#include "date.h"
+
+char *directory = "/lib/news";
+char *prefix = "";
+
+int chatty = 0;
+int dry = 0;
+
+void
+usage(void)
+{+ fprint(2, "usage: %s "
+ "[ -ct ] "
+ "[ -p prefix ] "
+ "[ -d directory ]\n", argv0);
+ exits("usage");+}
+
+void
+writefeedfiles(Feed *f)
+{+ int fd;
+ char file[1024];
+ long d;
+ Dir dir;
+
+ if(f != nil){+ while(f->n != nil)
+ f = f->n;
+
+ while(f != nil){+ if(f->s == 2){+ d = parsedate(f->date);
+ snprint(file, 1023, "%s/%s%ld", directory, prefix, d);
+
+ fd = create(file, OWRITE, 0666);
+ if(!fd)
+ sysfatal("error creating file %s: %r", file);+
+ if(chatty)
+ fprint(2, "writing file %s\n", file);
+
+ if(dry){+ f = f->p;
+ continue;
+ }
+ if(f->title != nil)
+ fprint(fd, "title: %s\n", f->title);
+ if(f->date != nil)
+ fprint(fd, "pubDate: %s (%ld)\n", f->date, d);
+ if(f->link != nil)
+ fprint(fd, "link: %s\n", f->link);
+ if(f->desc != nil)
+ fprint(fd, "\n%s\n", f->desc);
+
+ nulldir(&dir);
+ dir.mtime = d;
+ dirfwstat(fd, &dir);
+
+ close(fd);
+ }
+ f = f->p;
+ }
+ }
+}
+
+void
+freefeed(Feed *f)
+{+ if(f != nil){+ if(f->title != nil)
+ free(f->title);
+ if(f->link != nil)
+ free(f->link);
+ if(f->desc != nil)
+ free(f->desc);
+ if(f->date != nil)
+ free(f->date);
+ free(f);
+ }
+ return;
+}
+
+void
+freefeedt(Feed *r)
+{+ while(r != nil){+ if(r->n != nil){+ r = r->n;
+ freefeed(r->p);
+ } else {+ freefeed(r);
+ r = nil;
+ }
+ }
+}
+
+Feed *
+searchfeed(Feed *r, char *title, char *link, char *desc, char *date)
+{+ while(r != nil){+ if(r->title != nil && title != nil){+ if(!strcmp(r->title, title)){+ r->s = 1;
+ return r;
+ }
+ }
+ if(r->link != nil && link != nil){+ if(!strcmp(r->link, link)){+ r->s = 1;
+ return r;
+ }
+ }
+ if(r->desc != nil && desc != nil){+ if(!strcmp(r->desc, desc)){+ r->s = 1;
+ return r;
+ }
+ }
+ if(r->date != nil && date != nil){+ if(!strcmp(r->date, date)){+ r->s = 1;
+ return r;
+ }
+ }
+ r = r->n;
+ }
+ return nil;
+}
+
+Feed *
+addfeed(Feed *r, Feed *f)
+{+ Feed *ret;
+
+ ret = r;
+ f->s = 2;
+ if(r != nil) {+ while(r->n != nil)
+ r = r->n;
+ } else
+ return f;
+ r->n = f;
+ f->p = r;
+
+ return ret;
+}
+
+Feed *
+removefeed(Feed *r, Feed *f)
+{+ if(f->n != nil && f->p != nil){+ f->n->p = f->p;
+ f->p->n = f->n;
+ } else {+ if(f->n != nil){+ f->n->p = nil;
+ r = f->n;
+ }
+ if(f->p != nil)
+ f->p->n = nil;
+ }
+ freefeed(f);
+
+ return r;
+}
+
+Feed *
+checkfeed(Feed *r)
+{+ Feed *a;
+
+ a = r;
+
+ while(a != nil){+ if(a->s == 0)
+ r = removefeed(r, a);
+ else
+ a->s = 0;
+ a = a->n;
+ }
+
+ return r;
+}
+
+void
+main(int argc, char **argv)
+{+ xmlpull *x, *a;
+ char st;
+ Feed *f, *r;
+
+ ARGBEGIN {+ case 'd':
+ directory = EARGF(usage());
+ break;
+ case 'p':
+ prefix = EARGF(usage());
+ break;
+ case 't':
+ dry = 1;
+ break;
+ case 'c':
+ chatty = 1;
+ break;
+ } ARGEND;
+
+ if(dry)
+ chatty = 1;
+
+ st = NONE;
+ f = nil;
+ r = nil;
+
+ x = openxmlpull(0);
+ while((a = nextxmlpull(x)) != nil && st != END){+ switch(a->ev){+ case START_DOCUMENT:
+ break;
+ case START_TAG:
+ if(!strcmp(x->na, "item") || !strcmp(x->na, "entry")){+ if(f != nil)
+ freefeed(f);
+ f = mallocz(sizeof(Feed), 2);
+ st = ITEM;
+ break;
+ }
+ if(!strcmp(x->na, "title") && st == ITEM){+ st = TITLE;
+ break;
+ }
+ if(!strcmp(x->na, "description") && st == ITEM){+ st = DESC;
+ break;
+ }
+ if(!strcmp(x->na, "link") && st == ITEM){+ st = LINK;
+ break;
+ }
+ if(!strcmp(x->na, "pubDate") && st == ITEM){+ st = DATE;
+ break;
+ }
+ break;
+ case START_END_TAG:
+ break;
+ case ATTR:
+ if(!strcmp(x->na, "href") && st == LINK)
+ f->link = strdup(x->va);
+ break;
+ case TEXT:
+ switch(st){+ case TITLE:
+ f->title = strdup(x->na);
+ break;
+ case LINK:
+ f->link = strdup(x->na);
+ break;
+ case DESC:
+ f->desc = strdup(x->na);
+ break;
+ case DATE:
+ f->date = strdup(x->na);
+ break;
+ default:
+ break;
+ }
+ break;
+ case END_TAG:
+ if((!strcmp(x->na, "item") || !strcmp(x->na, "entry")) && st == ITEM){+ if(searchfeed(r, f->title, f->link, f->desc, f->date) == nil){+ r = addfeed(r, f);
+ f = nil;
+ } else {+ freefeed(f);
+ f = nil;
+ }
+
+ st = NONE;
+ break;
+ }
+ if(!strcmp(x->na, "title") && st == TITLE){+ st = ITEM;
+ break;
+ }
+ if(!strcmp(x->na, "link") && st == LINK){+ st = ITEM;
+ break;
+ }
+ if(!strcmp(x->na, "description") && st == DESC){+ st = ITEM;
+ break;
+ }
+ if(!strcmp(x->na, "pubDate") && st == DATE){+ st = ITEM;
+ break;
+ }
+ if(!strcmp(x->na, "rdf:RDF") || !strcmp(x->na, "items")
+ || !strcmp(x->na, "rss") || !strcmp(x->na, "feed")){+ writefeedfiles(r);
+ r = checkfeed(r);
+ break;
+ }
+ break;
+ case END_DOCUMENT:
+ st = END;
+ break;
+ default:
+ sysfatal("Error, should never happen: %x", x->ev);+ break;
+ }
+ }
+ freexmlpull(x);
+ freefeedt(r);
+ exits(nil);
+}
--- /dev/null
+++ b/rssfill.h
@@ -1,0 +1,20 @@
+typedef struct Feed Feed;
+struct Feed {+ char *title;
+ char *link;
+ char *desc;
+ char *date;
+ int s;
+ Feed *n;
+ Feed *p;
+};
+
+enum {+ NONE = 0x00,
+ ITEM,
+ TITLE,
+ LINK,
+ DESC,
+ DATE,
+ END,
+};
--- /dev/null
+++ b/xmlpull.h
@@ -1,0 +1,51 @@
+/*
+ * Copy me if you can.
+ * by 20h
+ */
+
+#ifdef nil
+#pragma lib "libxmlpull.a"
+#endif
+
+#ifndef XMLPULL_H
+#define XMLPULL_H
+
+#ifndef nil
+#define nil NULL
+#define print printf
+#define snprint snprintf
+#define exits return
+#endif
+
+enum { + START_DOCUMENT = 0x0,
+ START_TAG,
+ START_END_TAG,
+ TEXT,
+ TEXT_C,
+ ATTR,
+ END_TAG,
+ END_TAG_S,
+ END_TAG_N,
+ END_DOCUMENT,
+};
+
+typedef struct xmlpull xmlpull;
+struct xmlpull {+ int fd;
+ char ev;
+ char nev;
+ char *lm;
+ char *na;
+ char *va;
+ int la;
+ int lv;
+ int ln;
+};
+
+void freexmlpull(xmlpull *x);
+xmlpull *openxmlpull(int fd);
+xmlpull *nextxmlpull(xmlpull *x);
+xmlpull *writexmlpull(xmlpull *x);
+
+#endif
--
⑨