ref: 1247006cbb0111a31261a734ed0324482ffe06df
dir: /docs/doxygen2rst.py/
from __future__ import print_function
import re, sys, os, time, glob, errno, tempfile, binascii, subprocess, shutil
from lxml import etree
from optparse import OptionParser
import textwrap
import string
VERSION = '0.1'
__all__ = ['DoxyGen2RST']
LINE_BREAKER = "\n"
MAX_COLUMN = 80
def is_valid_uuid(uuid_string):
    uuid4hex = re.compile('[0-9a-f]{32}\Z', re.I)
    return uuid4hex.match(uuid_string) != None
def get_page(refid):
    fields = refid.split("_")
    if(is_valid_uuid(fields[-1][-32:])):
        return ["_".join(fields[0:-1]), fields[-1]]
    return [refid, None]
def mkdir_p(path):
    try:
        os.makedirs(path)
    except OSError as exc: # Python >2.5
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else:
            raise
def _glob(path, *exts):
    path = os.path.join(path, "*") if os.path.isdir(path) else path + "*"
    return [f for files in [glob.glob(path + ext) for ext in exts] for f in files]
class DoxyGen2RST(object):
    """
    Customize the Doxygen XML output into RST format, then it can
    be translated into all formats with the unified user interface.
    The Doxygen output itself is too verbose and not hard to be
    organized for a good documentation.
    """
    def __init__(self,
                 src,
                 dst,
                 missing_filename = "missing.rst",
                 is_github = False,
                 enable_uml = True,
                 github_ext = ""):
        self.doxy_output_dir = os.path.join(src, "_doxygen", "xml")
        self.output_dir = dst
        self.rst_dir = src
        self.enable_uml = enable_uml
        mkdir_p(dst)
        self.is_github = is_github
        if(is_github):
            self.page_ext = github_ext
            self.anchor_prefix = "wiki-"
        else:
            self.anchor_prefix = ""
            self.page_ext = ".html"
        self.filter = ["*.rst", "*.rest"]
        self.re_doxy = "<doxygen2rst\s(\S*)=(\S*)>(.*?)</doxygen2rst>"
        self.index_root = etree.parse(os.path.join(self.doxy_output_dir, "index.xml")).getroot()
        self.references = {}
        self.missed_types_structs = {}
        self.name_refid_map = {}
        self.build_references()
        self.page_references = {}
        self.missing_filename = missing_filename
        self.temp_uml_path = os.path.join(tempfile.gettempdir(), "uml_" + binascii.b2a_hex(os.urandom(15)))
        if os.path.exists(self.temp_uml_path):
            shutil.rmtree(self.temp_uml_path)
        os.mkdir(self.temp_uml_path)
    def _find_ref_id(self, kind, name):
        #print("_find_ref_id, %s - %s" %(kind, name))
        if(kind == "function"):
            for comp in self.index_root.iter("member"):
                if(comp.attrib["kind"].lower() == kind.lower() and
                   comp.findtext("name").lower() == name.lower()):
                    return (comp.attrib["refid"])
            pass
        else:
            for comp in self.index_root.iter("compound"):
                if(comp.attrib["kind"].lower() == kind.lower() and
                   comp.findtext("name").lower() == name.lower()):
                   return comp.attrib["refid"]
        return None
    def strip_title_ref(self, text):
        table = string.maketrans("","")
        retstr = text.translate(table, string.punctuation)
        words = retstr.split()
        retstr = "-".join(words)
        return retstr.lower()
    def build_references(self):
        for file in _glob(self.rst_dir, *self.filter):
            filename = os.path.basename(file)
            fin = open(file,'r')
            content = fin.read()
            it = re.finditer(self.re_doxy, content, re.DOTALL)
            for m in it:
                ref_id = self._find_ref_id(m.groups()[0], m.groups()[1])
                if(ref_id is None):
                    #print("Reference is NOT found for: %s=%s" % (m.groups()[0], m.groups()[1]))
                    continue
                page_name = os.path.splitext(filename)[0]
                title_ref = self.strip_title_ref(m.groups()[2])
                self.references[ref_id] = [m.groups()[0], m.groups()[1], page_name, filename, title_ref]
                self.name_refid_map[m.groups()[1]] = ref_id
            fin.close()
        #print(self.references)
    def call_plantuml(self):
        if(not self.enable_uml):
            return
        java_bin = os.path.join(os.environ['JAVA_HOME'], "bin", "java")
        output_path = os.path.abspath(os.path.join(self.output_dir, "images"))
        cmds = ["\"" + java_bin + "\"", "-jar", "plantuml.jar", self.temp_uml_path + "/", "-o", output_path]
        print(" ".join(cmds))
        os.system(" ".join(cmds))
        shutil.rmtree(self.temp_uml_path)
    def _build_uml(self, uml_name, content):
        uml_path = os.path.join(self.temp_uml_path, uml_name + ".txt")
        fuml = open(uml_path, "w+")
        fuml.write("@startuml\n")
        fuml.write(content)
        fuml.write("\n@enduml\n")
        fuml.close()
        return ".. image:: images/" + uml_name + ".png" + LINE_BREAKER
    def _build(self, m):
        retstr = ""
        if(m.groups()[0] == "uml"):
            retstr = self._build_uml(m.groups()[1], m.groups()[2])
        elif(m.groups()[0] == "link"):
            link = m.groups()[1] + self.page_ext
            retstr = ("`%s <%s>`_" % (m.groups()[2], link))
        else:
            if(m.groups()[0] != "function"):
                retstr +=  self._build_title(m.groups()[2])
            retstr += self.convert_doxy(m.groups()[0], m.groups()[1])
        return retstr
    def generate(self):
        for file in _glob(self.rst_dir, *self.filter):
            filename = os.path.basename(file)
            fin = open(file,'r')
            input_txt = fin.read()
            fin.close()
            output_txt = re.sub(self.re_doxy, self._build, input_txt, 0, re.DOTALL)
            output_txt  += self._build_page_ref_notes()
            fout = open(os.path.join(self.output_dir, filename), 'w+')
            fout.write(output_txt)
            fout.close()
            #print("%s --- %s" %( file, os.path.join(self.output_dir, filename)))
        self._build_missed_types_and_structs()
        self.call_plantuml()
    def make_para_title(self, title,  indent = 4):
        retstr = LINE_BREAKER
        if(title):
            retstr += "".ljust(indent, " ") + "| **" + title + "**" +  LINE_BREAKER
        return retstr
    def _build_title(self, title, flag = '=', ref = None):
        retstr = LINE_BREAKER
        if(ref):
            retstr += ".. _ref-" + ref + ":" + LINE_BREAKER + LINE_BREAKER
        retstr += title + LINE_BREAKER
        retstr += "".ljust(20, flag) + LINE_BREAKER
        retstr += LINE_BREAKER
        return retstr
    def _build_ref(self, node):
        text = node.text.strip()
        retstr = ""
        target = '`' + text + '`'
        retstr += target + "_ "
        if target in self.page_references:
            reflink = self.page_references[target]
            print("Link already added: %s == %s" % (reflink[0], node.attrib["refid"]))
            assert(reflink[0] == node.attrib["refid"])
            pass
        else:
            self.page_references[target] = (node.attrib["refid"], node.attrib["kindref"], text)
        return retstr
    def _build_code_block(self, node):
        retstr = "::" + LINE_BREAKER + LINE_BREAKER
        for codeline in node.iter("codeline"):
            retstr += "  "
            for phrases in codeline.iter("highlight"):
                if(phrases.text):
                    retstr += phrases.text.strip()
                for child in phrases:
                    if(child.text):
                        retstr += child.text.strip()
                    if(child.tag == "sp"):
                        retstr += " "
                    if(child.tag == "ref" and child.text):
                        #escape the reference in the code block
                        retstr += "" # self._build_ref(child)
                    if(child.tail):
                        retstr += child.tail.strip()
            retstr += LINE_BREAKER
        return retstr
    def _build_itemlist(self, node):
        retstr = ""
        for para in node:
            if(para.tag != "para"):
                continue
            if(para.text):
                retstr += para.text.strip()
            for child in para:
                if(child.tag == "ref" and child.text):
                    retstr += self._build_ref(child)
                if(child.tail):
                    retstr += child.tail.strip()
        return retstr
    def _build_itemizedlist(self, node):
        retstr = LINE_BREAKER
        if(node == None):
            return ""
        for item in node:
            if(item.tag != "listitem"):
                continue
            retstr += "    - " + self._build_itemlist(item)
            retstr += LINE_BREAKER
        return retstr
    def _build_verbatim(self, node):
        retstr = LINE_BREAKER
        if(node.text):
            lines = node.text.splitlines()
            print(lines[0])
            m = re.search("{plantuml}\s(\S*)", lines[0])
            if(m):
                uml_name = "uml_" + m.groups()[0]
                retstr += self._build_uml(uml_name, "\n".join(lines[1:]))
            else:
                retstr += "::" + LINE_BREAKER + LINE_BREAKER
                retstr += node.text
        return retstr
    def _build_para(self, para):
        retstr = ""
        no_new_line = False
        if(para.text):
            retstr += textwrap.fill(para.text.strip(), MAX_COLUMN) + LINE_BREAKER + LINE_BREAKER
        for child in para:
            no_new_line = False
            if(child.tag == "simplesect"):
                for child_para in child:
                    if(child.attrib["kind"] == "return"):
                        return_str = self._build_para(child_para)
                        retstr += "".ljust(4, " ") + "| Return:" + LINE_BREAKER
                        for line in return_str.splitlines():
                            retstr += "".ljust(4, " ") + "| " + line + LINE_BREAKER
                    elif(child_para.tag == "title" and child_para.text):
                        lf.make_para_title(child_para.text.strip(), 4)
                    elif(child_para.tag == "para"): #for @see
                        retstr += self._build_para(child_para)
                    elif(child_para.text):
                        retstr += "".ljust(4, " ") + "| " + child_para.text.strip() + LINE_BREAKER
            if(child.tag == "preformatted"):
                retstr += "::" + LINE_BREAKER + LINE_BREAKER
                if(child.text):
                    for line in child.text.splitlines():
                        retstr += "  " + line + LINE_BREAKER
            if(child.tag == "ref" and child.text):
                retstr = retstr.rstrip('\n')
                retstr += " " + self._build_ref(child)
                no_new_line = True
            if(child.tag == "programlisting"):
                retstr += self._build_code_block(child)
            if(child.tag == "itemizedlist"):
                retstr += self._build_itemizedlist(child)
            if(child.tag == "verbatim"):
                retstr += self._build_verbatim(child)
            if(not no_new_line):
                retstr += LINE_BREAKER
            if(child.tail):
                retstr += textwrap.fill(child.tail.strip(), MAX_COLUMN) + LINE_BREAKER + LINE_BREAKER
        return retstr
    def get_text(self, node):
        retstr = ""
        if(node == None):
            return ""
        for para in node:
            if(para.tag != "para"):
                continue
            retstr += self._build_para(para)
        return retstr
    def _find_text_ref(self, node):
        retstr = ""
        if(node.text):
            retstr += node.text.strip()
        for child in node:
            if(child.tag == "ref"):
                retstr += " " + self._build_ref(child) + " "
            if(child.tail):
                retstr += child.tail.strip()
        return retstr
    def _build_row_breaker(self, columns):
        retstr = "+"
        for column in columns:
            retstr += "".ljust(column, "-") + "+"
        return retstr + LINE_BREAKER
    def _wrap_cell(self, text, length = 30):
        newlines = []
        for line in text.splitlines():
            newlines.extend(textwrap.wrap(line, length))
        return newlines
    def _build_row(self, row, columns):
        retstr = ""
        row_lines = []
        max_line = 0
        for i in range(3):
            row_lines.append(row[i].splitlines())
            if(max_line < len(row_lines[i])):
                max_line = len(row_lines[i])
        for i in range(max_line):
            for j in range(3):
                retstr += "|"
                if(len(row_lines[j]) > i):
                    retstr += row_lines[j][i]
                    retstr += "".ljust(columns[j] - len(row_lines[j][i]), " ")
                else:
                    retstr += "".ljust(columns[j], " ")
            retstr += "|" + LINE_BREAKER
        return retstr
    def _build_table(self, rows):
        retstr = ""
        columns = [0, 0, 0]
        for row in rows:
            for i in range(3):
                for rowline in row[i].splitlines():
                    if(columns[i] < len(rowline) + 2):
                        columns[i] = len(rowline) + 2
        #columns[0] = 40 if(columns[0] > 40) else columns[0]
        #columns[1] = 40 if(columns[1] > 40) else columns[1]
        #columns[2] = MAX_COLUMN - columns[0] - columns[1]
        retstr += self._build_row_breaker(columns)
        for row in rows:
            retstr += self._build_row(row, columns)
            retstr += self._build_row_breaker(columns)
        return retstr;
    def build_param_list(self, params, paramdescs):
        retstr = ""
        param_descriptions = []
        for desc in paramdescs:
            param_descriptions.append(desc)
        rows = []
        rows.append(("Name", "Type", "Descritpion"))
        for param in params:
            declname = param.findtext("declname")
            paramdesc = None
            for desc in param_descriptions:
                paramname = desc.findtext("parameternamelist/parametername")
                if(paramname.lower() == declname.lower()):
                    paramdesc = desc.find("parameterdescription")
                    break
            decltype = self._find_text_ref(param.find("type"))
            rows.append((declname, decltype, self.get_text(paramdesc)))
        if(len(rows) > 1):
            retstr += self._build_table(rows)
        return retstr
    def _build_enum(self, member):
        enum_id = member.attrib["id"]
        file, tag = get_page(enum_id)
        retstr = self._build_title(member.findtext("name"), ref = tag)
        detail_node = self.get_desc_node(member)
        if(detail_node is not None):
            retstr += LINE_BREAKER
            retstr += self.get_text(detail_node)
        rows = []
        rows.append(("Name", "Initializer", "Descritpion"))
        for enumvalue in member.iter("enumvalue"):
            name = enumvalue.findtext("name")
            initializer = enumvalue.findtext("initializer")
            if(not initializer):
                initializer = ""
            desc = self.get_text(enumvalue.find("briefdescription"))
            desc += self.get_text(enumvalue.find("detaileddescription"))
            if(not desc):
                desc = ""
            rows.append((name, initializer, desc))
        if(len(rows) > 1):
            retstr += self._build_table(rows)
        return retstr
    def _build_struct(self, node):
        retstr = ""
        detail_node = self.get_desc_node(node)
        if(detail_node is not None):
            retstr += self.get_text(detail_node) + LINE_BREAKER
        rows = []
        rows.append(("Name", "Type", "Descritpion"))
        for member in node.iter("memberdef"):
            if(member.attrib["kind"] == "variable"):
                name = member.findtext("name")
                type = self._find_text_ref(member.find("type"))
                desc = self.get_text(member.find("briefdescription"))
                desc += self.get_text(member.find("detaileddescription"))
                desc += self.get_text(member.find("inbodydescription"))
                if(not desc):
                    desc = ""
                rows.append((name, type, desc))
        if(len(rows) > 1):
            retstr += self._build_table(rows)
        return retstr
    def _build_class(self, node):
        retstr = ""
        for member in node.iter("memberdef"):
            if(member.attrib["kind"] == "function"):
                retstr += self.build_function(member)
        return retstr
    def get_desc_node(self, member):
        detail_node = member.find("detaileddescription")
        brief_node = member.find("briefdescription")
        detail_txt = ""
        if(detail_node == None and brief_node == None):
            return None
        if(detail_node is not None):
            detail_txt = detail_node.findtext("para")
        if(not detail_txt and brief_node != None):
            detail_txt = brief_node.findtext("para")
            detail_node = brief_node
        return detail_node
    def build_function(self, member):
        retstr = ""
        desc_node = self.get_desc_node(member)
        if(desc_node is None):
            return ""
        detail_txt = desc_node.findtext("para")
        if(not detail_txt or detail_txt.strip() == "{ignore}"):
            return ""
        func_id = member.attrib["id"]
        page_id, ref_id = get_page(func_id)
        retstr += self._build_title(member.findtext("name"), '-', ref = ref_id)
        retstr += self.get_text(desc_node)
        retstr += LINE_BREAKER
        detail_node = member.find("detaileddescription")
        if(desc_node != detail_node):
            retstr += self.get_text(detail_node)
        retstr += self.build_param_list(member.iter("param"), detail_node.iter("parameteritem"))
        return retstr
    def _build_missed_types_and_structs(self):
        fout = open(os.path.join(self.output_dir, self.missing_filename), 'w+')
        fout.write(".. contents:: " + LINE_BREAKER)
        fout.write("    :local:"  + LINE_BREAKER)
        fout.write("    :depth: 2" + LINE_BREAKER + LINE_BREAKER)
        footnote = ""
        while (len(self.missed_types_structs) > 0):
            for key, value in self.missed_types_structs.iteritems():
                fout.write(self.covert_item(value[0], key, value[1]))
                #print(value)
            self.missed_types_structs = {}
            footnote += self._build_page_ref_notes()
        fout.write(footnote)
        fout.close()
    def _build_page_ref_notes(self):
        retstr = LINE_BREAKER
        #TODO
        for key, value in self.page_references.iteritems():
            page, tag = get_page(value[0])
            m = re.search("_8h_", page)
            if(m):
                continue;
            rstname = None
            anchor = value[2].lower()
            if not page in self.references:
                self.missed_types_structs[value[0]] = (page, tag)
                rstname = os.path.splitext(self.missing_filename)[0]
            else:
                rstname = self.references[page][2]
                anchor = self.references[page][4]
            #if(tag and not self.is_github):
            #    anchor = self.anchor_prefix + "ref-" + tag
            retstr += ".. _" + key + ": " + rstname + self.page_ext + "#" + anchor
            retstr += LINE_BREAKER + LINE_BREAKER
        self.page_references = {}
        return retstr
    def _build_item_by_id(self, node, id):
        retstr = ""
        for member in node.iter("memberdef"):
            if(member.attrib["id"] != id):
                continue
            if(member.attrib["kind"] == "enum"):
                retstr += self._build_enum(member)
        return retstr
    def covert_item(self, compound, id, tag):
        xml_path = os.path.join(self.doxy_output_dir, "%s.xml" % compound)
        print("covert_item: id=%s, name=%s" % (id, xml_path))
        obj_root = etree.parse(xml_path).getroot()
        retstr = ""
        compound = obj_root.find("compounddef")
        compound_kind = compound.attrib["kind"]
        if(not tag):
            retstr += self._build_title(compound.findtext("compoundname"))
            if(compound_kind == "class"):
                retstr += self._build_class(compound)
            elif(compound_kind == "struct"):
                retstr += self._build_struct(compound)
        else:
            retstr += self._build_item_by_id(compound, id)
        return retstr
    def _build_page(self, compound):
        retstr = ""
        retstr += self.get_text(compound.find("detaileddescription"))
        return retstr
    def _build_file(self, compound, type, ref_id, name):
        retstr = ""
        for member in compound.iter("memberdef"):
            if(member.attrib["kind"] == "function" and member.attrib["id"] == ref_id):
                retstr += self.build_function(member)
        return retstr
    def convert_doxy(self, type, name):
        #print(name)
        file = ref_id = self.name_refid_map[name]
        dst_kind = type
        if(type == "function"):
            file, tag = get_page(ref_id)
            dst_kind = "file"
        xml_path = os.path.join(self.doxy_output_dir, "%s.xml" % file)
        print("convert_doxy: type=%s, name=%s" % (type, xml_path))
        obj_root = etree.parse(xml_path).getroot()
        compound = obj_root.find("compounddef")
        compound_kind = compound.attrib["kind"]
        assert(dst_kind == compound_kind)
        retstr = ""
        if(compound_kind == "class"):
            retstr += self._build_class(compound)
        elif(compound_kind == "struct"):
            retstr += self._build_struct(compound)
        elif(compound_kind == "page"):
            retstr += self._build_page(compound)
        elif(compound_kind == "group"):
            retstr += self._build_page(compound)
        elif(compound_kind == "file"):
            retstr += self._build_file(compound, type, ref_id, name)
        return retstr
if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("-g", "--github", action="store_true", help="Render the link in format of github wiki.")
    parser.add_argument("-e", "--ext", default="", help="extension for github wiki")
    parser.add_argument("-i", "--input", default="doxygen", help="Input file path of doxygen output and source rst file.")
    parser.add_argument("-o", "--output", default="wikipage", help="Output converted restructured text files to path.")
    parser.add_argument("-s", "--struct", default="TypesAndStructures.rest", help="Output of auto generated enum and structures.")
    parser.add_argument("-u", "--uml", action="store_true", help="Enable UML, you need to download plantuml.jar from Plantuml and put it to here. http://plantuml.sourceforge.net/")
    args = parser.parse_args()
    ext = ""
    if(len(args.ext) > 0):
        ext = ("." + args.ext)
    agent = DoxyGen2RST(args.input,
                        args.output,
                        args.struct,
                        is_github = True,
                        enable_uml = args.uml,
                        github_ext = ext)
    agent.generate()