# -*- coding: Latin-1 -*- """Graphviz's dot language parser. The dotparser parses graphviz files in dot and dot files and transforms them into a class representation defined by pydot. The module needs pyparsing (tested with version 1.2.2) and pydot Author: Michael Krause Fixes by: Ero Carrera """ __author__ = ['Michael Krause', 'Ero Carrera'] __license__ = 'MIT' import sys import glob import pydot import re import codecs from pyparsing import __version__ as pyparsing_version from pyparsing import ( nestedExpr, Literal, CaselessLiteral, Word, Upcase, OneOrMore, ZeroOrMore, Forward, NotAny, delimitedList, oneOf, Group, Optional, Combine, alphas, nums, restOfLine, cStyleComment, nums, alphanums, printables, empty, quotedString, ParseException, ParseResults, CharsNotIn, _noncomma, dblQuotedString, QuotedString, ParserElement ) class P_AttrList: def __init__(self, toks): self.attrs = {} i = 0 while i < len(toks): attrname = toks[i] if i+2 < len(toks) and toks[i+1] == '=': attrvalue = toks[i+2] i += 3 else: attrvalue = None i += 1 self.attrs[attrname] = attrvalue def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self.attrs) class DefaultStatement(P_AttrList): def __init__(self, default_type, attrs): self.default_type = default_type self.attrs = attrs def __repr__(self): return "%s(%s, %r)" % (self.__class__.__name__, self.default_type, self.attrs) top_graphs = list() def push_top_graph_stmt(str, loc, toks): attrs = {} g = None for element in toks: if( isinstance(element, (ParseResults, tuple, list)) and len(element) == 1 and isinstance(element[0], basestring) ): element = element[0] if element == 'strict': attrs['strict'] = True elif element in ['graph', 'digraph']: attrs = {} g = pydot.Dot(graph_type=element, **attrs) attrs['type'] = element top_graphs.append( g ) elif isinstance( element, basestring): g.set_name( element ) elif isinstance(element, pydot.Subgraph): g.obj_dict['attributes'].update( element.obj_dict['attributes'] ) g.obj_dict['edges'].update( element.obj_dict['edges'] ) g.obj_dict['nodes'].update( element.obj_dict['nodes'] ) g.obj_dict['subgraphs'].update( element.obj_dict['subgraphs'] ) g.set_parent_graph(g) elif isinstance(element, P_AttrList): attrs.update(element.attrs) elif isinstance(element, (ParseResults, list)): add_elements(g, element) else: raise ValueError, "Unknown element statement: %r " % element for g in top_graphs: update_parent_graph_hierarchy(g) if len( top_graphs ) == 1: return top_graphs[0] return top_graphs def update_parent_graph_hierarchy(g, parent_graph=None, level=0): if parent_graph is None: parent_graph = g for key_name in ('edges',): if isinstance(g, pydot.frozendict): item_dict = g else: item_dict = g.obj_dict if not item_dict.has_key( key_name ): continue for key, objs in item_dict[key_name].items(): for obj in objs: if 'parent_graph' in obj and obj['parent_graph'].get_parent_graph()==g: if obj['parent_graph'] is g: pass else: obj['parent_graph'].set_parent_graph(parent_graph) if key_name == 'edges' and len(key) == 2: for idx, vertex in enumerate( obj['points'] ): if isinstance( vertex, (pydot.Graph, pydot.Subgraph, pydot.Cluster)): vertex.set_parent_graph(parent_graph) if isinstance( vertex, pydot.frozendict): if vertex['parent_graph'] is g: pass else: vertex['parent_graph'].set_parent_graph(parent_graph) def add_defaults(element, defaults): d = element.__dict__ for key, value in defaults.items(): if not d.get(key): d[key] = value def add_elements(g, toks, defaults_graph=None, defaults_node=None, defaults_edge=None): if defaults_graph is None: defaults_graph = {} if defaults_node is None: defaults_node = {} if defaults_edge is None: defaults_edge = {} for elm_idx, element in enumerate(toks): if isinstance(element, (pydot.Subgraph, pydot.Cluster)): add_defaults(element, defaults_graph) g.add_subgraph(element) elif isinstance(element, pydot.Node): add_defaults(element, defaults_node) g.add_node(element) elif isinstance(element, pydot.Edge): add_defaults(element, defaults_edge) g.add_edge(element) elif isinstance(element, ParseResults): for e in element: add_elements(g, [e], defaults_graph, defaults_node, defaults_edge) elif isinstance(element, DefaultStatement): if element.default_type == 'graph': default_graph_attrs = pydot.Node('graph', **element.attrs) g.add_node(default_graph_attrs) elif element.default_type == 'node': default_node_attrs = pydot.Node('node', **element.attrs) g.add_node(default_node_attrs) elif element.default_type == 'edge': default_edge_attrs = pydot.Node('edge', **element.attrs) g.add_node(default_edge_attrs) defaults_edge.update(element.attrs) else: raise ValueError, "Unknown DefaultStatement: %s " % element.default_type elif isinstance(element, P_AttrList): g.obj_dict['attributes'].update(element.attrs) else: raise ValueError, "Unknown element statement: %r" % element def push_graph_stmt(str, loc, toks): g = pydot.Subgraph('') add_elements(g, toks) return g def push_subgraph_stmt(str, loc, toks): g = pydot.Subgraph('') for e in toks: if len(e)==3: e[2].set_name(e[1]) if e[0] == 'subgraph': e[2].obj_dict['show_keyword'] = True return e[2] else: if e[0] == 'subgraph': e[1].obj_dict['show_keyword'] = True return e[1] return g def push_default_stmt(str, loc, toks): # The pydot class instances should be marked as # default statements to be inherited by actual # graphs, nodes and edges. # default_type = toks[0][0] if len(toks) > 1: attrs = toks[1].attrs else: attrs = {} if default_type in ['graph', 'node', 'edge']: return DefaultStatement(default_type, attrs) else: raise ValueError, "Unknown default statement: %r " % toks def push_attr_list(str, loc, toks): p = P_AttrList(toks) return p def get_port(node): if len(node)>1: if isinstance(node[1], ParseResults): if len(node[1][0])==2: if node[1][0][0]==':': return node[1][0][1] return None def do_node_ports(node): node_port = '' if len(node) > 1: node_port = ''.join( [str(a)+str(b) for a,b in node[1] ] ) return node_port def push_edge_stmt(str, loc, toks): tok_attrs = [a for a in toks if isinstance(a, P_AttrList)] attrs = {} for a in tok_attrs: attrs.update(a.attrs) e = [] if isinstance(toks[0][0], pydot.Graph): n_prev = pydot.frozendict(toks[0][0].obj_dict) else: n_prev = toks[0][0] + do_node_ports( toks[0] ) if isinstance(toks[2][0], ParseResults): n_next_list = [[n.get_name(),] for n in toks[2][0] ] for n_next in [n for n in n_next_list]: n_next_port = do_node_ports(n_next) e.append(pydot.Edge(n_prev, n_next[0]+n_next_port, **attrs)) elif isinstance(toks[2][0], pydot.Graph): e.append(pydot.Edge(n_prev, pydot.frozendict(toks[2][0].obj_dict), **attrs)) elif isinstance(toks[2][0], pydot.Node): node = toks[2][0] if node.get_port() is not None: name_port = node.get_name() + ":" + node.get_port() else: name_port = node.get_name() e.append(pydot.Edge(n_prev, name_port, **attrs)) elif isinstance(toks[2][0], type('')): for n_next in [n for n in tuple(toks)[2::2]]: if isinstance(n_next, P_AttrList) or not isinstance(n_next[0], type('')): continue n_next_port = do_node_ports( n_next ) e.append(pydot.Edge(n_prev, n_next[0]+n_next_port, **attrs)) n_prev = n_next[0]+n_next_port else: # UNEXPECTED EDGE TYPE pass return e def push_node_stmt(s, loc, toks): if len(toks) == 2: attrs = toks[1].attrs else: attrs = {} node_name = toks[0] if isinstance(node_name, list) or isinstance(node_name, tuple): if len(node_name)>0: node_name = node_name[0] n = pydot.Node(str(node_name), **attrs) return n graphparser = None def graph_definition(): global graphparser if not graphparser: # punctuation colon = Literal(":") lbrace = Literal("{") rbrace = Literal("}") lbrack = Literal("[") rbrack = Literal("]") lparen = Literal("(") rparen = Literal(")") equals = Literal("=") comma = Literal(",") dot = Literal(".") slash = Literal("/") bslash = Literal("\\") star = Literal("*") semi = Literal(";") at = Literal("@") minus = Literal("-") # keywords strict_ = CaselessLiteral("strict") graph_ = CaselessLiteral("graph") digraph_ = CaselessLiteral("digraph") subgraph_ = CaselessLiteral("subgraph") node_ = CaselessLiteral("node") edge_ = CaselessLiteral("edge") # token definitions identifier = Word(alphanums + "_." ).setName("identifier") double_quoted_string = QuotedString('"', multiline=True, unquoteResults=False) # dblQuotedString alphastring_ = OneOrMore(CharsNotIn(_noncomma + ' ')) def parse_html(s, loc, toks): return '<%s>' % ''.join(toks[0]) opener = '<' closer = '>' html_text = nestedExpr( opener, closer, ( CharsNotIn( opener + closer ) ) ).setParseAction(parse_html).leaveWhitespace() ID = ( identifier | html_text | double_quoted_string | #.setParseAction(strip_quotes) | alphastring_ ).setName("ID") float_number = Combine(Optional(minus) + OneOrMore(Word(nums + "."))).setName("float_number") righthand_id = (float_number | ID ).setName("righthand_id") port_angle = (at + ID).setName("port_angle") port_location = (OneOrMore(Group(colon + ID)) | Group(colon + lparen + ID + comma + ID + rparen)).setName("port_location") port = (Group(port_location + Optional(port_angle)) | Group(port_angle + Optional(port_location))).setName("port") node_id = (ID + Optional(port)) a_list = OneOrMore(ID + Optional(equals + righthand_id) + Optional(comma.suppress())).setName("a_list") attr_list = OneOrMore(lbrack.suppress() + Optional(a_list) + rbrack.suppress()).setName("attr_list") attr_stmt = (Group(graph_ | node_ | edge_) + attr_list).setName("attr_stmt") edgeop = (Literal("--") | Literal("->")).setName("edgeop") stmt_list = Forward() graph_stmt = Group(lbrace.suppress() + Optional(stmt_list) + rbrace.suppress() + Optional(semi.suppress()) ).setName("graph_stmt") edge_point = Forward() edgeRHS = OneOrMore(edgeop + edge_point) edge_stmt = edge_point + edgeRHS + Optional(attr_list) subgraph = Group(subgraph_ + Optional(ID) + graph_stmt).setName("subgraph") edge_point << Group( subgraph | graph_stmt | node_id ).setName('edge_point') node_stmt = (node_id + Optional(attr_list) + Optional(semi.suppress())).setName("node_stmt") assignment = (ID + equals + righthand_id).setName("assignment") stmt = (assignment | edge_stmt | attr_stmt | subgraph | graph_stmt | node_stmt).setName("stmt") stmt_list << OneOrMore(stmt + Optional(semi.suppress())) graphparser = OneOrMore( (Optional(strict_) + Group((graph_ | digraph_)) + Optional(ID) + graph_stmt).setResultsName("graph") ) singleLineComment = Group("//" + restOfLine) | Group("#" + restOfLine) # actions graphparser.ignore(singleLineComment) graphparser.ignore(cStyleComment) assignment.setParseAction(push_attr_list) a_list.setParseAction(push_attr_list) edge_stmt.setParseAction(push_edge_stmt) node_stmt.setParseAction(push_node_stmt) attr_stmt.setParseAction(push_default_stmt) subgraph.setParseAction(push_subgraph_stmt) graph_stmt.setParseAction(push_graph_stmt) graphparser.setParseAction(push_top_graph_stmt) return graphparser def parse_dot_data(data): global top_graphs top_graphs = list() if data.startswith(codecs.BOM_UTF8): data = data.decode( 'utf-8' ) try: graphparser = graph_definition() if pyparsing_version >= '1.2': graphparser.parseWithTabs() tokens = graphparser.parseString(data) if len(tokens) == 1: return tokens[0] else: return [g for g in tokens] except ParseException, err: print err.line print " "*(err.column-1) + "^" print err return None