shithub: xml-9atom

Download patch

ref: 47a288683dbdc55c5df9b9f65db9909de6021af0
parent: ce4a8027322c53b2832f221e2956c90dcd65fd1a
author: sirjofri <sirjofri@sirjofri.de>
date: Wed Jul 24 11:16:58 EDT 2024

adds namespace support

--- a/README
+++ b/README
@@ -27,6 +27,14 @@
 This will remove all the installed files.
 
 
+Libxml
+
+This version of libxml adds support for the following features.
+To get a good overview of how these features work, check the man page.
+
+- Namespace support for Elems and Attrs, which supersedes Fstripnamespace
+
+
 Libxpath
 
 currently supported rules:
--- a/libxml/mkfile
+++ b/libxml/mkfile
@@ -10,6 +10,7 @@
 	xmlparse.$O\
 	xmlprint.$O\
 	xmlnew.$O\
+	xmlns.$O\
 	xmllook.$O\
 	xmlvalue.$O\
 	heap.$O\
--- a/libxml/xmlattr.c
+++ b/libxml/xmlattr.c
@@ -6,8 +6,9 @@
 xmlattr(Xml *xp, Attr **root, Elem *parent, char *name, char *value)
 {
 	Attr *ap, *t;
+	char *s;
+	Ns *n;
 
-	USED(xp);
 	if((ap = xmlcalloc(xp, sizeof(Attr), 1)) == nil)
 		sysfatal("no memory - %r\n");
 	if(*root == nil){
@@ -20,9 +21,28 @@
 	}
 	ap->parent = parent;
 
-	if(name)
-		if((ap->name = xmlstrdup(xp, name, 1)) == nil)
+	if (name) {
+		n = nil;
+		if (strncmp(name, "xmlns:", 6) == 0) {
+			n = xmlfindns(xp, name+6);
+		} else
+		if (strcmp(name, "xmlns") == 0) {
+			n = xmlfindns(xp, nil);
+		}
+		if (n && value)
+			n->decl = xmlstrdup(xp, value, 0);
+	}
+
+	if(name){
+		s = strchr(name, ':');
+		if (s) {
+			ap->ns = xmlfindns(xp, name);
+			s++;
+		} else
+			s = name;
+		if((ap->name = xmlstrdup(xp, s, 1)) == nil)
 			sysfatal("no memory - %r\n");
+	}
 
 	if(value)
 		if((ap->value = xmlstrdup(xp, value, 0)) == nil)
--- a/libxml/xmlelem.c
+++ b/libxml/xmlelem.c
@@ -6,6 +6,8 @@
 xmlelem(Xml *xp, Elem **root, Elem *parent, char *name)
 {
 	Elem *ep, *t;
+	Ns *ns;
+	char *s;
 
 	USED(xp);
 	if((ep = xmlcalloc(xp, sizeof(Elem), 1)) == nil)
@@ -18,10 +20,18 @@
 			continue;
 		t->next = ep;
 	}
+	ns = strchr(name, ':') ? xmlfindns(xp, name) : xmlfindns(xp, nil);
+	ep->ns = ns;
 	ep->parent = parent;
-	if(name)
-		if((ep->name = xmlstrdup(xp, name, 1)) == nil)
+	if(name){
+		s = strchr(name, ':');
+		if (s)
+			s++;
+		else
+			s = name;
+		if((ep->name = xmlstrdup(xp, s, 1)) == nil)
 			sysfatal("no memory - %r\n");
+	}
 	return ep;
 }
 
--- /dev/null
+++ b/libxml/xmlns.c
@@ -1,0 +1,75 @@
+#include <u.h>
+#include <libc.h>
+#include "xml.h"
+
+Ns*
+xmladdns(Xml *xml, char *name, char *decl)
+{
+	Ns *n;
+
+	if (!xml->ns) {
+		xml->ns = xmlcalloc(xml, sizeof(Ns), 1);
+		n = xml->ns;
+		if (name)
+			n->name = xmlstrdup(xml, name, 1);
+		if (decl)
+			n->decl = xmlstrdup(xml, decl, 0);
+		goto Out;
+	}
+
+	n = xml->ns;
+	while (n->next)
+		n = n->next;
+
+	n->next = xmlcalloc(xml, sizeof(Ns), 1);
+	n = n->next;
+	if (name)
+		n->name = xmlstrdup(xml, name, 1);
+	if (decl)
+		n->decl = xmlstrdup(xml, decl, 0);
+Out:
+	if (xmldebug == 1)
+		fprint(2, "addns: %s = %s\n", name, decl);
+	return n;
+}
+
+Ns*
+xmlfindns(Xml *xml, char *name)
+{
+	Ns *n;
+	int i;
+	char *s;
+
+	if (!name) {
+		i = 0;
+		goto Search;
+	}
+
+	s = strchr(name, ':');
+	if (!s) {
+		i = strlen(name);
+		goto Search;
+	}
+	i = s - name;
+
+Search:
+	if (xmldebug) {
+		if (i)
+			fprint(2, "search for xmlns %.*s (%d)\n", i, name, i);
+		else
+			fprint(2, "search for root xmlns\n");
+	}
+	for (n = xml->ns; n; n = n->next) {
+		if (n->name == nil && !i)
+			return n;
+		if (n->name && i && strncmp(n->name, name, i) == 0)
+			return n;
+	}
+	if (!i)
+		return xmladdns(xml, nil, nil);
+	s = mallocz(i+1, 1);
+	strncpy(s, name, i);
+	n = xmladdns(xml, s, nil);
+	free(s);
+	return n;
+}
--- a/libxml/xmlparse.c
+++ b/libxml/xmlparse.c
@@ -443,6 +443,8 @@
 	Lexbuf pcdata, *pc;
 	Elem *root, *ep;
 	int os, s, t, a;
+	char *str;
+	Ns *ns;
 
 	ap = nil;
 	ep = nil;
@@ -465,8 +467,6 @@
 				fprint(2, "%-3d %*.selem name='%s'\n", st->line, depth, "", lb->buf);
 			if(!isname1(lb->buf[0]))
 				failed(st, "'%s' is an illegal element name", lb->buf);
-			if(st->flags & Fstripnamespace)
-				stripns(lb->buf);
 			assert((ep = xmlelem(st->xml, &root, parent, lb->buf)) != nil);
 			ep->line = st->line;
 			break;
@@ -480,8 +480,6 @@
 				fprint(2, "%-3d %*.sattr name='%s'\n", st->line, depth, "", lb->buf);
 			if(!isname1(lb->buf[0]))
 				failed(st, "'%s' is an illegal attribute name", lb->buf);
-			if(st->flags & Fstripnamespace)
-				stripns(lb->buf);
 			assert((ap = xmlattr(st->xml, &(ep->attrs), ep, lb->buf, nil)) != nil);
 			break;
 		case Avalue:
@@ -488,6 +486,11 @@
 			assert(ep != nil);
 			assert(ap != nil);
 			ap->value = xmlstrdup(st->xml, lb->buf, 0);
+			if(ap->ns && ap->ns->name && strcmp(ap->ns->name, "xmlns") == 0){
+				ns = xmlfindns(st->xml, ap->name);
+				if(ns)
+					ns->decl = xmlstrdup(st->xml, lb->buf, 0);
+			}
 			ap = nil;
 			if(xmldebug == 1)
 				fprint(2, "%*.sattr value=%s\n", depth, "", lb->buf);
@@ -514,9 +517,17 @@
 			break;
 		case Acheck:
 			assert(ep != nil);
-			if(st->flags & Fstripnamespace)
-				stripns(lb->buf);
-			if(ep->name && strcmp(lb->buf, ep->name) != 0)
+			if(str = strchr(lb->buf, ':')){
+				ns = xmlfindns(st->xml, lb->buf);
+				if(ep->ns != ns){
+					failed(st, "</%s> found, but does not match namespace for <%s:%s> (re: line %d) - nesting error",
+						lb->buf, ep->ns->name, ep->name, ep->line);
+					break;
+				}
+				str++;
+			} else
+				str = lb->buf;
+			if(ep->name && strcmp(str, ep->name) != 0)
 				failed(st, "</%s> found, expecting match for <%s> (re: line %d) - nesting error",
 					lb->buf, ep->name, ep->line);
 			break;
--- a/libxml/xmlprint.c
+++ b/libxml/xmlprint.c
@@ -33,13 +33,22 @@
 _xmlprint(Biobuf *bp, Elem *ep, int in)
 {
 	Attr *ap;
+	char *ns;
 	enum {indent = 4};
 
 	for(; ep; ep = ep->next){
-		Bprint(bp, "%*s<%s", in, "", ep->name);
-	
+		ns = ep->ns ? ep->ns->name : nil;
+		Bprint(bp, "%*s<", in, "");
+		if (ns)
+			Bprint(bp, "%s:", ns);
+		Bprint(bp, "%s", ep->name);
+
 		for (ap = ep->attrs; ap; ap = ap->next){
-			Bprint(bp, " %s=\'", ap->name);
+			ns = ap->ns ? ap->ns->name : nil;
+			Bprint(bp, " ");
+			if (ns)
+				Bprint(bp, "%s:", ns);
+			Bprint(bp, "%s=\'", ap->name);
 			prval(bp, ap->value);
 			Bprint(bp, "\'");
 		}
--- a/xml
+++ b/xml
@@ -32,6 +32,7 @@
 struct Xml{
 	Elem *root;		/* root of tree */
 	char *doctype;		/* DOCTYPE structured comment, or nil */
+	Ns *ns;			/* list of all namespaces */
 	...
 };
 .PB
@@ -42,6 +43,7 @@
 	Attr *attrs;		/* linked list of atributes */
 	char *name;		/* element name */
 	char *pcdata;		/* pcdata following this element */
+	Ns *ns;			/* namespace */
 	int line;			/* Line number (for errors) */
 };
 .PB
@@ -48,10 +50,17 @@
 struct Attr {
 	Attr *next;		/* next atribute */
 	Elem *parent;		/* parent element */
-	char *name;		/* atributes name */
+	char *name;		/* attributes name */
 	char *value;		/* atributes value */
+	Ns	*ns;			/* namespace */
 };
 .PB
+struct Ns {
+	char *name;		/* name of namespace */
+	char *decl;		/* namespace declaration */
+	Ns *next;			/* next namespace */
+};
+.PB
 .PD 0
 .ta +\w'\fL      'u +\w'\fL    'u +6n +4n
 Attr*	xmlattr(Xml *xp, Attr **root, Elem *parent,
@@ -78,6 +87,10 @@
 void	xmlfree(Xml *xp)
 .PB
 void	xmlprint(Xml *xp, int fd)
+.PB
+Ns* 	xmlfindns(Xml *xp, char *s)
+.PB
+Ns* 	xmladdns(Xml *xp, char *s, char *v)
 .SH DESCRIPTION
 .PP
 .I Libxml
@@ -158,6 +171,11 @@
 writes the XML hierarchy rooted at \fIep\fR as text to the given
 file descriptor.
 .PP
+.I Xmlfindns
+and
+.I xmladdns
+find, add or adjust a namespace for the given XML document.
+.PP
 .IR Xmlmalloc ,
 .IR xmlcalloc ,
 and
@@ -174,7 +192,9 @@
 .SH "SEE ALSO"
 .IR xb (1).
 .SH BUGS
-Namespaces should be handled properly.
+The current namespace implementation supersedes
+.I Fstripnamespace
+and is probably buggy.
 .PP
 A SAX model parser will probably be needed sometime (e.g. for Ebooks).
 .PP
--- a/xml.h
+++ b/xml.h
@@ -4,6 +4,7 @@
 typedef struct Xml Xml;
 typedef struct Attr Attr;
 typedef struct Elem Elem;
+typedef struct Ns Ns;
 
 typedef struct Xtree Xtree;
 typedef struct Xblock Xblock;
@@ -19,6 +20,7 @@
 struct Xml {
 	Elem	*root;			/* root of tree */
 	char	*doctype;		/* DOCTYPE structured comment, or nil */
+	Ns  	*ns;
 	struct {
 		Xtree	*root;
 		Xblock	*active;
@@ -33,6 +35,7 @@
 	Attr	*attrs;		/* linked list of atributes */
 	char	*name;			/* element name */
 	char	*pcdata;		/* pcdata following this element */
+	Ns  	*ns;
 	int	line;			/* Line number (for errors) */
 };
 
@@ -41,8 +44,15 @@
 	Elem	*parent;		/* parent element */
 	char	*name;			/* atributes name (nil for coments) */
 	char	*value;		/* atributes value */
+	Ns  	*ns;
 };
 
+struct Ns {
+	char	*name;
+	char	*decl;
+	Ns  	*next;
+};
+
 extern int xmldebug;
 
 Attr*	xmlattr(Xml *, Attr **, Elem *, char *, char *);
@@ -59,3 +69,5 @@
 Xml*	xmlparse(int, int, int);
 void	xmlprint(Xml *, int);
 char*	xmlvalue(Elem *, char *);
+Ns* 	xmlfindns(Xml *, char *);
+Ns* 	xmladdns(Xml *, char *, char *);
--- a/xpath
+++ b/xpath
@@ -15,7 +15,7 @@
 #include <xpath.h>
 .PB
 enum {
-	Xelems = 1,
+	Xelem = 1,
 	Xstring = 2,
 	Xnum = 3,
 }