ref: 70dbd6be889621dc1b07b732d757c3dddd6dc164
parent: 561b01eedfb87965b2ff1f7e32d8270ad98cf59e
author: uriel <uriel@vm41.cat-v.org>
date: Tue Jul 28 10:49:52 EDT 2009
Add yiyus md2html.awk script as an alternative to markdown.pl
--- /dev/null
+++ b/bin/contrib/md2html.awk
@@ -1,0 +1,427 @@
+#!/bin/awk -f
+#
+# by: Jesus Galan (yiyus) 2009
+#
+# Usage: md2html.awk file.md > file.html
+# See: http://4l77.com/src/md2html.awk
+
+function eschtml(t) {+ gsub("&", "\\&", t);+ gsub("<", "\\<", t);+ return t;
+}
+
+function oprint(t){+ if(nr == 0)
+ print t;
+ else
+ otext = otext "\n" t;
+}
+
+function subref(id){+ for(; nr > 0 && sub("<<" id, ref[id], otext); nr--);+ if(nr == 0 && otext) {+ print otext;
+ otext = "";
+ }
+}
+
+function nextil(t) {+ if(!match(t, /[`<&\[*_\\-]|(\!\[)/))
+ return t;
+ t1 = substr(t, 1, RSTART - 1);
+ tag = substr(t, RSTART, RLENGTH);
+ t2 = substr(t, RSTART + RLENGTH);
+ if(ilcode && tag != "`")
+ return eschtml(t1 tag) nextil(t2);
+ # Backslash escaping
+ if(tag == "\\"){+ if(match(t2, /^[\\`*_{}\[\]()#+\-\.!]/)){+ tag = substr(t2, 1, 1);
+ t2 = substr(t2, 2);
+ }
+ return t1 tag nextil(t2);
+ }
+ # Dashes
+ if(tag == "-"){+ if(sub(/^-/, "", t2))
+ tag = "—";
+ return t1 tag nextil(t2);
+ }
+ # Inline Code
+ if(tag == "`"){+ if(sub(/^`/, "", t2)){+ if(!match(t2, /``/))
+ return t1 "”" nextil(t2);
+ ilcode2 = !ilcode2;
+ }
+ else if(ilcode2)
+ return t1 tag nextil(t2);
+ tag = "<code>";
+ if(ilcode){+ t1 = eschtml(t1);
+ tag = "</code>";
+ }
+ ilcode = !ilcode;
+ return t1 tag nextil(t2);
+ }
+ if(tag == "<"){+ # Autolinks
+ if(match(t2, /^[^ ]+[\.@][^ ]+>/)){+ url = eschtml(substr(t2, 1, RLENGTH - 1));
+ t2 = substr(t2, RLENGTH + 1);
+ linktext = url;
+ if(match(url, /@/) && !match(url, /^mailto:/))
+ url = "mailto:" url;
+ return t1 "<a href=\"" url "\">" linktext "</a>" nextil(t2);
+ }
+ # Html tags
+ if(match(t2, /^[A-Za-z\/!][^>]*>/)){+ tag = tag substr(t2, RSTART, RLENGTH);
+ t2 = substr(t2, RLENGTH + 1);
+ return t1 tag nextil(t2);
+ }
+ return t1 "<" nextil(t2);
+ }
+ # Html special entities
+ if(tag == "&"){+ if(match(t2, /^#?[A-Za-z0-9]+;/)){+ tag = tag substr(t2, RSTART, RLENGTH);
+ t2 = substr(t2, RLENGTH + 1);
+ return t1 tag nextil(t2);
+ }
+ return t1 "&" nextil(t2);
+ }
+ # Images
+ if(tag == "!["){+ if(!match(t2, /(\[.*\])|(\(.*\))/))
+ return t1 tag nextil(t2);
+ match(t2, /^[^\]]*/);
+ alt = substr(t2, 1, RLENGTH);
+ t2 = substr(t2, RLENGTH + 2);
+ if(match(t2, /^\(/)){+ # Inline
+ sub(/^\(/, "", t2);
+ match(t2, /^[^\)]+/);
+ url = eschtml(substr(t2, 1, RLENGTH));
+ t2 = substr(t2, RLENGTH + 2);
+ title = "";
+ if(match(url, /[ ]+\".*\"[ ]*$/)) {+ title = substr(url, RSTART, RLENGTH);
+ url = substr(url, 1, RSTART - 1);
+ match(title, /\".*\"/);
+ title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\"";
+ }
+ if(match(url, /^<.*>$/))
+ url = substr(url, 2, RLENGTH - 2);
+ return t1 "<img src=\"" url "\" alt=\"" alt "\"" title " />" nextil(t2);
+ }
+ else{+ # Referenced
+ sub(/^ ?\[/, "", t2);
+ id = alt;
+ if(match(t2, /^[^\]]+/))
+ id = substr(t2, 1, RLENGTH);
+ t2 = substr(t2, RLENGTH + 2);
+ if(ref[id])
+ r = ref[id];
+ else{+ r = "<<" id;
+ nr++;
+ }
+ return t1 "<img src=\"" r "\" alt=\"" alt "\" />" nextil(t2);
+ }
+ }
+ # Links
+ if(tag == "["){+ if(!match(t2, /(\[.*\])|(\(.*\))/))
+ return t1 tag nextil(t2);
+ match(t2, /^[^\]]*(\[[^\]]*\][^\]]*)*/);
+ linktext = substr(t2, 1, RLENGTH);
+ t2 = substr(t2, RLENGTH + 2);
+ if(match(t2, /^\(/)){+ # Inline
+ match(t2, /^[^\)]+(\([^\)]+\)[^\)]*)*/);
+ url = substr(t2, 2, RLENGTH - 1);
+ pt2 = substr(t2, RLENGTH + 2);
+ title = "";
+ if(match(url, /[ ]+\".*\"[ ]*$/)) {+ title = substr(url, RSTART, RLENGTH);
+ url = substr(url, 1, RSTART - 1);
+ match(title, /\".*\"/);
+ title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\"";
+ }
+ if(match(url, /^<.*>$/))
+ url = substr(url, 2, RLENGTH - 2);
+ url = eschtml(url);
+ return t1 "<a href=\"" url "\"" title ">" nextil(linktext) "</a>" nextil(pt2);
+ }
+ else{+ # Referenced
+ sub(/^ ?\[/, "", t2);
+ id = linktext;
+ if(match(t2, /^[^\]]+/))
+ id = substr(t2, 1, RLENGTH);
+ t2 = substr(t2, RLENGTH + 2);
+ if(ref[id])
+ r = ref[id];
+ else{+ r = "<<" id;
+ nr++;
+ }
+ pt2 = t2;
+ return t1 "<a href=\"" r "\" />" nextil(linktext) "</a>" nextil(pt2);
+ }
+ }
+ # Emphasis
+ if(match(tag, /[*_]/)){+ ntag = tag;
+ if(sub("^" tag, "", t2)){+ if(stag[ns] == tag && match(t2, "^" tag))
+ t2 = tag t2;
+ else
+ ntag = tag tag
+ }
+ n = length(ntag);
+ tag = (n == 2) ? "strong" : "em";
+ if(match(t1, / $/) && match(t2, /^ /))
+ return t1 tag nextil(t2);
+ if(stag[ns] == ntag){+ tag = "/" tag;
+ ns--;
+ }
+ else
+ stag[++ns] = ntag;
+ tag = "<" tag ">";
+ return t1 tag nextil(t2);
+ }
+}
+
+function inline(t) {+ ilcode = 0;
+ ilcode2 = 0;
+ ns = 0;
+
+ return nextil(t);
+}
+
+function printp(tag) {+ if(!match(text, /^[ ]*$/)){+ text = inline(text);
+ if(tag != "")
+ oprint("<" tag ">" text "</" tag ">");+ else
+ oprint(text);
+ }
+ text = "";
+}
+
+BEGIN {+ blank = 0;
+ code = 0;
+ hr = 0;
+ html = 0;
+ nl = 0;
+ nr = 0;
+ otext = "";
+ text = "";
+ par = "p";
+}
+
+# References
+!code && /^ *\[[^\]]*\]:[ ]+/ {+ sub(/^ *\[/, "");
+ match($0, /\]/);
+ id = substr($0, 1, RSTART - 1);
+ sub(id "\\]:[ ]+", "");
+ title = "";
+ if(match($0, /\".*\"$/))
+ title = "\" title=\"" substr($0, RSTART + 1, RLENGTH - 2);
+ sub(/[ ]+\".*\"$/, "");
+ url = eschtml($0);
+ ref[id] = url title;
+
+ subref(id);
+ next;
+}
+
+# html
+!html && /^<(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\
+isindex|menu|noframes|noscript|ol|p|pre|table|ul|!--)/ {+ if(code)
+ oprint("</pre></code>");+ for(; !text && block[nl] == "blockquote"; nl--)
+ oprint("</blockquote>");+ match($0, /^<(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\
+ isindex|menu|noframes|noscript|ol|p|pre|table|ul|!--)/);
+ htag = substr($0, 2, RLENGTH - 1);
+ if(!match($0, "(<\\/" htag ">)|((^<hr ?\\/?)|(--)>$)"))
+ html = 1;
+ if(html && match($0, /^<hr/))
+ hr = 1;
+ oprint($0);
+ next;
+}
+
+html && (/(^<\/(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\
+isindex|menu|noframes|noscript|ol|p|pre|table|ul).*)|(--)>$/ ||
+(hr && />$/)) {+ html = 0;
+ hr = 0;
+ oprint($0);
+ next;
+}
+
+html {+ oprint($0);
+ next;
+}
+
+# List and quote blocks
+
+# Remove indentation
+{+ for(nnl = 0; nnl < nl; nnl++)
+ if((match(block[nnl + 1], /[ou]l/) && !sub(/^( | )/, "")) || \
+ (block[nnl + 1] == "blockquote" && !sub(/^> ?/, "")))
+ break;
+}
+nnl < nl && !blank && text && ! /^ ? ? ?([*+-]|([0-9]+\.)+)( +| )/ { nnl = nl; }+# Quote blocks
+{ + while(sub(/^> /, ""))
+ nblock[++nnl] = "blockquote";
+}
+# Horizontal rules
+{ hr = 0; }+(blank || (!text && !code)) && /^ ? ? ?([-*_][ ]*)([-*_][ ]*)([-*_][ ]*)+$/ {+ if(code){+ oprint("</pre></code>");+ code = 0;
+ }
+ blank = 0;
+ nnl = 0;
+ hr = 1;
+}
+# List items
+block[nl] ~ /[ou]l/ && /^$/ {+ blank = 1;
+ next;
+}
+{ newli = 0; }+!hr && (nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?[*+-]( +| )/ {+ sub(/^ ? ? ?[*+-]( +| )/, "");
+ nnl++;
+ nblock[nnl] = "ul";
+ newli = 1;
+}
+(nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?([0-9]+\.)+( +| )/ {+ sub(/^ ? ? ?([0-9]+\.)+( +| )/, "");
+ nnl++;
+ nblock[nnl] = "ol";
+ newli = 1;
+}
+newli { + if(blank && nnl == nl && !par)
+ par = "p";
+ blank = 0;
+ printp(par);
+ if(nnl == nl && block[nl] == nblock[nl])
+ oprint("</li><li>");+}
+blank && ! /^$/ {+ if(match(block[nnl], /[ou]l/) && !par)
+ par = "p";
+ printp(par);
+ par = "p";
+ blank = 0;
+}
+
+# Close old blocks and open new ones
+nnl != nl || nblock[nl] != block[nl] {+ if(code){+ oprint("</pre></code>");+ code = 0;
+ }
+ printp(par);
+ b = (nnl > nl) ? nblock[nnl] : block[nnl];
+ par = (match(b, /[ou]l/)) ? "" : "p";
+}
+nnl < nl || (nnl == nl && nblock[nl] != block[nl]) {+ for(; nl > nnl || (nnl == nl && pblock[nl] != block[nl]); nl--){+ if(match(block[nl], /[ou]l/))
+ oprint("</li>");+ oprint("</" block[nl] ">");+ }
+}
+nnl > nl {+ for(; nl < nnl; nl++){+ block[nl + 1] = nblock[nl + 1];
+ oprint("<" block[nl + 1] ">");+ if(match(block[nl + 1], /[ou]l/))
+ oprint("<li>");+ }
+}
+hr {+ oprint("<hr>");+ next;
+}
+
+# Code blocks
+code && /^$/ { + if(blanK)
+ oprint("");+ blank = 1;
+ next;
+}
+!text && sub(/^( | )/, "") {+ if(blanK)
+ oprint("");+ blank = 0;
+ if(!code)
+ oprint("<code><pre>");+ code = 1;
+ $0 = eschtml($0);
+ oprint($0);
+ next;
+}
+code {+ oprint("</pre></code>");+ code = 0;
+}
+
+# Setex-style Headers
+text && /^=+$/ {printp("h1"); next;}+text && /^-+$/ {printp("h2"); next;} +
+# Atx-Style headers
+/^#+/ && (!newli || par=="p" || /^##/) {+ for(n = 0; n < 6 && sub(/^# */, ""); n++)
+ sub(/#$/, "");
+ par = "h" n;
+}
+
+# Paragraph
+/^$/ {+ printp(par);
+ par = "p";
+ next;
+}
+
+# Add text
+{ text = (text ? text " " : "") $0; }+
+END {+ if(code){+ oprint("</pre></code>");+ code = 0;
+ }
+ printp(par);
+ for(; nl > 0; nl--){+ if(match(block[nl], /[ou]l/))
+ oprint("</li>");+ oprint("</" block[nl] ">");+ }
+ gsub(/<<[^\"]*/, "", otext);
+ print(otext);
+}
--
⑨