X-Git-Url: http://git.nguyen.vg/gitweb/?p=tatoo.git;a=blobdiff_plain;f=src%2Ftree.ml;h=97711dd60f6e0662c306c1ecc306e8eb8e0c1831;hp=f914d332a9badf518eac73496d74f22574b0072f;hb=aade6d9ba2e2b65e021de8a1c3a2d3874aa5742e;hpb=7e9bcacfcf765bf20e19f6277da8471bcf395d26 diff --git a/src/tree.ml b/src/tree.ml index f914d33..97711dd 100644 --- a/src/tree.ml +++ b/src/tree.ml @@ -13,212 +13,103 @@ (* *) (***********************************************************************) -type node = { - tag : QName.t; - preorder : int; - mutable data : string; - mutable first_child : node; - mutable next_sibling : node; - mutable parent: node; -} - - - -let rec nil = { - tag = QName.nil; - preorder = -1; - data = ""; - first_child = nil; - next_sibling = nil; - parent = nil; -} - -let dummy_tag = QName.of_string "#dummy" -let rec dummy = { - tag = dummy_tag; - preorder = -1; - data = ""; - first_child = dummy; - next_sibling = dummy; - parent = dummy; -} - - -type t = { - root : node; - (* TODO add other intersting stuff *) -} - - - -module Parser = -struct - - type context = { - mutable stack : node list; - text_buffer : Buffer.t; - mutable current_preorder : int; - } - - let print_node_ptr fmt n = - Format.fprintf fmt "%s" - (if n == nil then "" else - if n == dummy then "" else - "") - - let debug_node fmt node = - Format.fprintf fmt "{ tag=%s; preorder=%i; data=%s; first_child=%a; next_sibling=%a; parent=%a }" - (QName.to_string node.tag) - node.preorder - node.data - print_node_ptr node.first_child - print_node_ptr node.next_sibling - print_node_ptr node.parent - - - let debug_ctx fmt ctx = - Format.fprintf fmt "Current context: { preorder = %i\n; stack = \n%a\n }\n-------------\n" - ctx.current_preorder - (Pretty.print_list ~sep:";\n" debug_node) ctx.stack - - - let push n ctx = ctx.stack <- n :: ctx.stack - let pop ctx = - match ctx.stack with - [] -> failwith "XML parse error" - | e :: l -> ctx.stack <- l; e - - let top ctx = match ctx.stack with - | [] -> failwith "XML parse error" - | e :: _ -> e - - let next ctx = - let i = ctx.current_preorder in - ctx.current_preorder <- 1 + i; - i - - let is_left n = n.next_sibling == dummy - - - let text_string = QName.to_string QName.text - - let rec start_element_handler parser_ ctx tag attr_list = - do_text parser_ ctx; - let parent = top ctx in - let n = { tag = QName.of_string tag; - preorder = next ctx; - data = ""; - first_child = dummy; - next_sibling = dummy; - parent = parent; - } - in - if parent.first_child == dummy then parent.first_child <- n - else parent.next_sibling <- n; - push n ctx - - and consume_closing ctx n = - if n.next_sibling != dummy then - let _ = pop ctx in consume_closing ctx (top ctx) - - and end_element_handler parser_ ctx tag = - do_text parser_ ctx; - let node = top ctx in - if nodef.first_child == dummy then node.first_child <- nil - else begin - node.next_sibling <- nil; - consume_closing ctx node - end - - and do_text parser_ ctx = - if Buffer.length ctx.text_buffer != 0 then - let s = Buffer.contents ctx.text_buffer in - Buffer.clear ctx.text_buffer; - start_element_handler parser_ ctx text_string []; - let node = top ctx in - node.data <- s; - end_element_handler parser_ ctx text_string; - Format.eprintf "DEBUG: %a\n\n" debug_ctx ctx - - - let character_data_handler parser_ ctx text = - Buffer.add_string ctx.text_buffer text - - let create_parser () = - let ctx = { text_buffer = Buffer.create 512; - current_preorder = 0; - stack = [] } in - let parser_ = Expat.parser_create ~encoding:None in - Expat.set_start_element_handler parser_ (start_element_handler parser_ ctx); - Expat.set_end_element_handler parser_ (end_element_handler parser_ ctx); - Expat.set_character_data_handler parser_ (character_data_handler parser_ ctx); - push { tag = QName.document; - preorder = next ctx; - data = ""; - first_child = dummy; - next_sibling = dummy; - parent = nil; - } ctx; - (parser_, - fun () -> - let node = top ctx in - node.next_sibling <- nil; - consume_closing ctx node; - match ctx.stack with - [ root ] -> - root.next_sibling <- nil; - { root = root } - | _ -> raise (Expat.Expat_error Expat.UNCLOSED_TOKEN) - ) - - - let parse_string s = - let parser_, finalize = create_parser () in - Expat.parse parser_ s; - finalize () - - let parse_file fd = - let buffer = String.create 4096 in - let parser_, finalize = create_parser () in - let rec loop () = - let read = input fd buffer 0 4096 in - if read != 0 then - let () = Expat.parse_sub parser_ buffer 0 read in - loop () - in loop (); finalize () +(** The different kind of XML nodes and utility functions *) +module NodeKind = + struct + type t = + Document | Element | Text | Comment | Attribute | ProcessingInstruction | Node + + let to_string = + function + Document -> "document" + | Element -> "element" + | Attribute -> "attribute" + | Text -> "text" + | Comment -> "comment" + | ProcessingInstruction -> "processing-instruction" + | Node -> "node" + + let print ppf k = Format.fprintf ppf "%s" (to_string k) + + + let is_a k1 k2 = + k1 == Node || k2 == Node || k1 == k2 end +(** Signatures for trees *) + +exception Parse_error of string + +module type S = +sig + type node + (** The type of a tree node *) + + type t + (** The type of trees *) + + val size : t -> int + (** Return the number of nodes *) + + val nil : node + (** Nil node, denoting the first/second child of a leaf or the parent of + the root *) + + val dummy : node + (** Dummy node that is guaranteed to never occur in any tree *) + + val load_xml_file : in_channel -> t + (** Takes a file descriptor and returns the XML data stored in the + corresponding file. Start at the current position in the file + descriptor (which is not necessarily the begining of file) + *) -let load_xml_file = Parser.parse_file -let load_xml_string = Parser.parse_string - -let rec print_xml out tree_ node = - if node != nil then - let () = - if node.tag == QName.text then - output_string out node.data - else - let tag = QName.to_string node.tag in - output_char out '<'; - output_string out tag; - (* print attributes *) - if node.first_child == nil then output_string out "/>" - else begin - output_char out '>'; - print_xml out tree_ node.first_child; - output_string out "' - end - in - print_xml out tree_ node.next_sibling - - -let root t = t.root -let first_child _ n = n.first_child -let next_sibling _ n = n.next_sibling -let parent _ n = n.parent -let tag _ n = n.tag -let data _ n = n.data -let preorder _ n = n.preorder + val load_xml_string : string -> t + (** Loads XML data stored in a string *) + + val print_xml : out_channel -> t -> node -> unit + (** Outputs the tree as an XML document on the given output_channel *) + + val root : t -> node + (** Returns the root of the tree *) + + val first_child : t -> node -> node + (** [first_child t n] returns the first child of node [n] in tree [t]. + Returns [nil] if [n] is a leaf. Returns [nil] if [n == nil]. + *) + + val next_sibling : t -> node -> node + (** [next_sibling t n] returns the next_sibling of node [n] in tree [t]. + Returns [nil] if [n] is the last child of a node. + Returns [nil] if [n == nil]. + *) + + val parent : t -> node -> node + (** [next_sibling t n] returns the parent of node [n] in tree [t]. + Returns [nil] if [n] is the root of the tree. + Returns [nil] if [n == nil]. + *) + + val tag : t -> node -> QName.t + (** Returns the label of a given node *) + + val data : t -> node -> string + (** Returns the character data associated with a node. + The only node having character data are those whose label is + QName.text, QName.cdata_section or QName.comment + *) + + val kind : t -> node -> NodeKind.t + (** Returns the kind of the given node *) + + val preorder : t -> node -> int + (** [preorder t n] returns the pre-order position of [n] in [t]. + [preodrder t (root t) == 0] and [preorder t nil < 0]. + *) + + val by_preorder : t -> int -> node + (** [by_preorder t i] returns the node with preorder [i] + *) + val print_node : Format.formatter -> node -> unit +end