ref: 62e0c1460b517f101896a5079378e83e4bcd8483
parent: 99636f59fa87c0b351b0fa934970ff63a889d608
author: sirjofri <sirjofri@sirjofri.de>
date: Mon Apr 19 08:23:06 EDT 2021
adds xmlpull including CDATA support.
--- a/mkfile
+++ b/mkfile
@@ -2,7 +2,8 @@
BIN=$home/bin/$objtype
TARG=rssfill
-OFILES=rssfill.$O
+OFILES=rssfill.$O xmlpull.$O
HFILES=rssfill.h xmlpull.h
+CFLAGS=-DPLAN9
</sys/src/cmd/mkone
--- a/rssfill.c
+++ b/rssfill.c
@@ -24,11 +24,11 @@
writefeedfiles(Feed *f)
{
int fd;
- char file[1024];
+ char *file = nil;
long d;
Tm t;
Dir dir;
-
+
if(f != nil){
while(f->n != nil)
f = f->n;
@@ -41,18 +41,20 @@
sysfatal("tmparse: %r");
else
fprint(2, "tmparse: auto parsed date\n");
-
+
d = tmnorm(&t);
-
- snprint(file, 1023, "%s/%s%ld", directory, prefix, d);
-
+
+ if(file)
+ free(file);
+ file = smprint("%s/%s%ld", directory, prefix, d);
+
fd = create(file, OWRITE, 0666);
if(!fd)
sysfatal("error creating file %s: %r", file);
-
+
if(chatty)
fprint(2, "writing file %s\n", file);
-
+
if(dry){
f = f->p;
continue;
@@ -67,11 +69,11 @@
fprint(fd, "\n%s\n", f->desc);
if(f->cont != nil)
fprint(fd, "\n%s\n", f->cont);
-
+
nulldir(&dir);
dir.mtime = d;
dirfwstat(fd, &dir);
-
+
close(fd);
}
f = f->p;
@@ -276,22 +278,28 @@
if(!strcmp(x->na, "href") && st == LINK)
f->link = strdup(x->va);
break;
+ case CDATA:
case TEXT:
switch(st){
case TITLE:
- f->title = strdup(x->na);
+ if (!f->title || strlen(f->title) == 0)
+ f->title = strdup(x->na);
break;
case LINK:
- f->link = strdup(x->na);
+ if (!f->link || strlen(f->link) == 0)
+ f->link = strdup(x->na);
break;
case DESC:
- f->desc = strdup(x->na);
+ if (!f->desc || strlen(f->desc) == 0)
+ f->desc = strdup(x->na);
break;
case CONTENT:
- f->cont = strdup(x->na);
+ if (!f->cont || strlen(f->cont) == 0)
+ f->cont = strdup(x->na);
break;
case DATE:
- f->date = strdup(x->na);
+ if (!f->date || strlen(f->date) == 0)
+ f->date = strdup(x->na);
break;
default:
break;
@@ -299,13 +307,13 @@
break;
case END_TAG:
if((!strcmp(x->na, "item") || !strcmp(x->na, "entry")) && st == ITEM){
- if(searchfeed(r, f->title, f->link, f->desc, f->date) == nil){
+ // if(searchfeed(r, f->title, f->link, f->desc, f->date) == nil){
r = addfeed(r, f);
f = nil;
- } else {
- freefeed(f);
- f = nil;
- }
+ // } else {
+ // freefeed(f);
+ // f = nil;
+ // }
st = NONE;
break;
--- /dev/null
+++ b/xmlpull.c
@@ -1,0 +1,467 @@
+/*
+ * Copy me if you can.
+ * by 20h
+ */
+
+#ifndef PLAN9
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#endif
+#ifdef PLAN9
+#include <u.h>
+#include <libc.h>
+#endif
+#include "xmlpull.h"
+
+void *
+reallocp(void *p, int s, short d)
+{
+ p = realloc(p, s);
+ if(p == nil){
+ perror("realloc");
+ exits("realloc");
+ }
+
+ if(d != 0)
+ memset(p, 0, s);
+
+ return (void *)p;
+}
+
+void
+freexmlpull(xmlpull *x)
+{
+ if(x != nil){
+ if(x->na != nil)
+ free(x->na);
+ if(x->va != nil)
+ free(x->va);
+ free(x);
+ }
+
+ return;
+}
+
+xmlpull *
+openxmlpull(int fd)
+{
+ xmlpull *ret;
+
+ ret = reallocp(nil, sizeof(xmlpull), 2);
+ ret->na = nil;
+ ret->va = nil;
+ ret->lm = nil;
+ ret->ln = 0;
+ ret->lv = 0;
+ ret->la = 0;
+ ret->ev = START_DOCUMENT;
+ ret->nev = START_DOCUMENT;
+ ret->fd = fd;
+
+ return ret;
+}
+
+char
+getchara(xmlpull *x)
+{
+ char g;
+
+ if(read(x->fd, &g, 1) <= 0){
+ x->ev = END_DOCUMENT;
+ return (char)0;
+ }
+
+ return g;
+}
+
+char *
+addchara(char *b, int *l, char c)
+{
+ b = reallocp(b, ++(*l) + 1, 0);
+ b[(*l) - 1] = c;
+ b[*l] = '\0';
+
+ return b;
+}
+
+char *
+readuntil(xmlpull *x, char *b, int *l, char w, char t)
+{
+ char g;
+
+ while((g = getchara(x)) != 0){
+ //print("||%c>%c||", g, w);
+ if(g == w){
+ b = addchara(b, l, '\0');
+ return b;
+ }
+
+ switch(g){
+ case '/':
+ case '>':
+ if(t != 0){
+ addchara(b, l, g);
+ return nil;
+ }
+ case '\t':
+ case '\r':
+ case '\n':
+ case ' ':
+ if(t != 0)
+ return b;
+ b = addchara(b, l, g);
+ break;
+ case '\\':
+ g = getchara(x);
+ //print("%c", g);
+ if(g == 0)
+ return nil;
+ b = addchara(b, l, g);
+ break;
+ default:
+ b = addchara(b, l, g);
+ break;
+ }
+ }
+
+ return nil;
+}
+
+
+char *
+parseattrib(xmlpull *x)
+{
+ char g, *b;
+
+ while((g = getchara(x)) != 0){
+ //print("%c", g);
+ switch(g){
+ case '\t':
+ case '\r':
+ case '\n':
+ case ' ':
+ continue;
+ case '/':
+ case '>':
+ x->na = addchara(x->na, &x->ln, g);
+ return nil;
+ default:
+ x->na = addchara(x->na, &x->ln, g);
+ g = (char)0;
+ }
+ if(g == (char)0)
+ break;
+ }
+
+ if((b = readuntil(x, x->na, &x->ln, '=', 2)) == nil)
+ return nil;
+ x->na = b;
+
+ if((g = getchara(x)) == 0)
+ return nil;
+
+ //print("magic char: %c\n", g);
+ switch(g){
+ case '"':
+ case '\'':
+ if((b = readuntil(x, x->va, &x->lv, g, 0)) == nil)
+ return nil;
+ x->va = b;
+ return x->va;
+ default:
+ if((b = readuntil(x, x->va, &x->lv, '>', 2)) == nil)
+ return nil;
+ x->va = b;
+ return x->na;
+ }
+
+ return x->na;
+}
+
+char *
+readname(xmlpull *x)
+{
+ char g;
+
+ while((g = getchara(x)) != 0){
+ //print("%c", g);
+ switch(g){
+ case '\n':
+ case '\t':
+ case '\r':
+ case ' ':
+ case '>':
+ case '/':
+ x->na = addchara(x->na, &x->ln, g);
+ return x->na;
+ default:
+ x->na = addchara(x->na, &x->ln, g);
+ }
+ }
+
+ return nil;
+}
+
+char *
+readcdata(xmlpull *x)
+{
+ char g;
+ while((g = getchara(x)) != 0){
+ x->na = addchara(x->na, &x->ln, g);
+ if(strncmp("]]>", &x->na[x->ln-3], 3) == 0) {
+ x->na[x->ln-3] = '\0';
+ break;
+ }
+ }
+
+ x->na[x->ln-1] = '\0'; /* if while breaks */
+ //print("X: '%s'\n", x->na);
+ return x->na;
+}
+
+int
+checkcdata(xmlpull *x)
+{
+ char name[7];
+ int i = 7;
+ while(i) {
+ name[7-i] = getchara(x);
+ i--;
+ }
+ if(strncmp("[CDATA[", name, 7) != 0) {
+ return 0;
+ }
+ x->ev = CDATA;
+ x->na = nil;
+ x->nev = TEXT;
+
+ /* read cdata contents in na */
+ x->na = readcdata(x);
+ return 1;
+}
+
+xmlpull *
+nextxmlpull(xmlpull *x)
+{
+ char g;
+
+ if(x->va != nil)
+ free(x->va);
+
+ if(x->ev == START_TAG){
+ if(x->lm != nil)
+ free(x->lm);
+ x->lm = x->na;
+ x->la = x->ln;
+ } else
+ if(x->na != nil)
+ free(x->na);
+
+ x->na = nil;
+ x->va = nil;
+ x->ln = 0;
+ x->lv = 0;
+ g = '\0';
+
+ switch(x->nev){
+ case START_DOCUMENT:
+ if((x->na = readuntil(x, x->na, &x->ln, '<', 0)) == nil)
+ x->nev = END_DOCUMENT;
+ else
+ x->nev = START_TAG;
+ x->ev = START_DOCUMENT;
+ break;
+ case START_TAG:
+ g = getchara(x);
+ //print("%c", g);
+ if(g == '/')
+ x->ev = END_TAG;
+ else if(g == '!' && checkcdata(x))
+ break;
+ else {
+ x->na = addchara(x->na, &x->ln, g);
+ x->ev = START_TAG;
+ }
+
+ if(readname(x) == nil)
+ x->nev = END_DOCUMENT;
+ else {
+ if(!strncmp(x->na, "!--", 3)){
+ x->na[x->ln - 1] = '\0';
+ x->nev = TEXT_C;
+ return x;
+ }
+ if(x->ev == END_TAG){
+ x->na[x->ln - 1] = '\0';
+ x->nev = TEXT;
+ } else {
+ switch(x->na[x->ln - 1]){
+ case '/':
+ getchara(x);
+ x->ev = START_END_TAG;
+ x->nev = TEXT;
+ x->na[x->ln - 1] = '\0';
+ break;
+ case '>':
+ x->nev = TEXT;
+ x->na[x->ln - 1] = '\0';
+ break;
+ default:
+ x->na[x->ln - 1] = '\0';
+ x->nev = ATTR;
+
+ }
+ }
+ }
+ break;
+ case TEXT_C:
+ g = '>';
+ case TEXT:
+ if(g != '>')
+ g = '<';
+
+ if((x->na = readuntil(x, x->na, &x->ln, g, 0)) == nil){
+ x->ev = END_DOCUMENT;
+ x->nev = END_DOCUMENT + 1;
+ } else {
+ if(x->nev == TEXT_C)
+ x->nev = TEXT;
+ else
+ x->nev = START_TAG;
+ x->ev = TEXT;
+ }
+ break;
+ case ATTR:
+ if(parseattrib(x) == nil){
+ //print("%c\n", x->na[x->ln - 1]);
+ switch(x->na[x->ln - 1]){
+ case '/':
+ free(x->na);
+ x->na = x->lm;
+ x->ln = x->la;
+ x->lm = nil;
+ x->la = 0;
+
+ getchara(x);
+ x->ev = END_TAG;
+ x->nev = TEXT;
+ return x;
+ case '>':
+ default:
+ x->na[x->ln - 1] = '\0';
+ }
+ x->ev = ATTR;
+ x->nev = TEXT;
+ return nextxmlpull(x);
+ } else
+ x->nev = ATTR;
+ x->ev = ATTR;
+ break;
+ case END_DOCUMENT:
+ x->ev = END_DOCUMENT;
+ x->nev = END_DOCUMENT + 1;
+ break;
+ default:
+ return nil;
+ }
+
+ return x;
+}
+
+xmlpull *
+writexmlpull(xmlpull *x)
+{
+ char *b;
+
+ b = nil;
+
+ switch(x->nev){
+ case START_DOCUMENT:
+ if(write(x->fd, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", 39) < 0)
+ return nil;
+ return x;
+ case START_TAG:
+ if(x->na == nil)
+ return nil;
+
+ b = reallocp(b, x->ln + 3, 2);
+ snprint(b, x->ln + 3, "<%s ", x->na);
+ if(write(x->fd, b, strlen(b)) < 0){
+ free(b);
+ return nil;
+ }
+ free(b);
+ return x;
+ case START_END_TAG:
+ if(x->na == nil)
+ return nil;
+
+ b = reallocp(b, x->ln + 4, 2);
+ snprint(b, x->ln + 4, "<%s/>", x->na);
+ if(write(x->fd, b, strlen(b)) < 0){
+ free(b);
+ return nil;
+ }
+ free(b);
+ return x;
+ case TEXT:
+ if(x->na == nil)
+ return nil;
+ if(write(x->fd, x->na, x->ln) < 0)
+ return nil;
+ return x;
+ case TEXT_C:
+ if(x->na == nil)
+ return nil;
+
+ b = reallocp(b, x->ln + 5, 2);
+ snprint(b, x->ln + 5, "%s -->", x->na);
+ if(write(x->fd, b, strlen(b)) < 0){
+ free(b);
+ return nil;
+ }
+ free(b);
+ return x;
+ case ATTR:
+ if(x->na == nil)
+ return nil;
+
+ b = reallocp(b, x->ln + x->lv + 5, 2);
+ snprint(b, x->ln + x->lv + 5, "%s=\"%s\" ", x->na, (x->va == nil) ? "" : x->va);
+ if(write(x->fd, b, strlen(b)) < 0){
+ free(b);
+ return nil;
+ }
+ free(b);
+ return x;
+ case END_TAG:
+ if(x->na == nil)
+ return nil;
+
+ b = reallocp(b, x->ln + 4, 2);
+ snprint(b, x->ln + 4, "</%s>", x->na);
+ if(write(x->fd, b, strlen(b)) < 0){
+ free(b);
+ return nil;
+ }
+ free(b);
+ return x;
+ case END_TAG_S:
+ if(write(x->fd, "/>", 2) < 0)
+ return nil;
+ return x;
+ case END_TAG_N:
+ if(write(x->fd, ">", 1) < 0)
+ return nil;
+ return x;
+ case END_DOCUMENT:
+ close(x->fd);
+ return nil;
+ default:
+ break;
+ }
+
+ return nil;
+}
+
--- a/xmlpull.h
+++ b/xmlpull.h
@@ -3,9 +3,11 @@
* by 20h
*/
+/*
#ifdef nil
#pragma lib "libxmlpull.a"
#endif
+*/
#ifndef XMLPULL_H
#define XMLPULL_H
@@ -21,6 +23,7 @@
START_DOCUMENT = 0x0,
START_TAG,
START_END_TAG,
+ CDATA,
TEXT,
TEXT_C,
ATTR,