X-Git-Url: http://git.nguyen.vg/gitweb/?p=tatoo.git;a=blobdiff_plain;f=src%2Ftree.ml;h=97711dd60f6e0662c306c1ecc306e8eb8e0c1831;hp=71f9a98b95180096a453254b3d5e564761f1a453;hb=aade6d9ba2e2b65e021de8a1c3a2d3874aa5742e;hpb=73755ec720254766e4504ac72684be5e357b6939 diff --git a/src/tree.ml b/src/tree.ml index 71f9a98..97711dd 100644 --- a/src/tree.ml +++ b/src/tree.ml @@ -13,260 +13,103 @@ (* *) (***********************************************************************) -(* - Time-stamp: -*) +(** The different kind of XML nodes and utility functions *) -type node = { - tag : QName.t; - preorder : int; - mutable data : string; - mutable first_child : node; - mutable next_sibling : node; - mutable parent: node; -} +module NodeKind = + struct + type t = + Document | Element | Text | Comment | Attribute | ProcessingInstruction | Node + let to_string = + function + Document -> "document" + | Element -> "element" + | Attribute -> "attribute" + | Text -> "text" + | Comment -> "comment" + | ProcessingInstruction -> "processing-instruction" + | Node -> "node" + let print ppf k = Format.fprintf ppf "%s" (to_string k) -let rec nil = { - tag = QName.nil; - preorder = -1; - data = ""; - first_child = nil; - next_sibling = nil; - parent = nil; -} - -let dummy_tag = QName.of_string "#dummy" -let rec dummy = { - tag = dummy_tag; - preorder = -1; - data = ""; - first_child = dummy; - next_sibling = dummy; - parent = dummy; -} - - -type t = { - root : node; - (* TODO add other intersting stuff *) -} - - - -module Parser = -struct - - type context = { - mutable stack : node list; - text_buffer : Buffer.t; - mutable current_preorder : int; - } - - let print_node_ptr fmt n = - Format.fprintf fmt "<%s>" - (if n == nil then "NIL" else - if n == dummy then "DUMMY" else - "NODE " ^ string_of_int n.preorder) - - let debug_node fmt node = - Format.fprintf fmt "{ tag=%s; preorder=%i; data=%s; first_child=%a; next_sibling=%a; parent=%a }" - (QName.to_string node.tag) - node.preorder - node.data - print_node_ptr node.first_child - print_node_ptr node.next_sibling - print_node_ptr node.parent - - - let debug_ctx fmt ctx = - Format.fprintf fmt "Current context: { preorder = %i\n; stack = \n%a\n }\n-------------\n" - ctx.current_preorder - (Pretty.print_list ~sep:";\n" debug_node) ctx.stack + let is_a k1 k2 = + k1 == Node || k2 == Node || k1 == k2 +end - let push n ctx = ctx.stack <- n :: ctx.stack - let pop ctx = - match ctx.stack with - [] -> failwith "XML parse error" - | e :: l -> ctx.stack <- l; e +(** Signatures for trees *) - let top ctx = match ctx.stack with - | [] -> failwith "XML parse error" - | e :: _ -> e +exception Parse_error of string - let next ctx = - let i = ctx.current_preorder in - ctx.current_preorder <- 1 + i; - i +module type S = +sig + type node + (** The type of a tree node *) - let is_left n = n.next_sibling == dummy + type t + (** The type of trees *) + val size : t -> int + (** Return the number of nodes *) - let text_string = QName.to_string QName.text - let attr_map_string = QName.to_string QName.attribute_map + val nil : node + (** Nil node, denoting the first/second child of a leaf or the parent of + the root *) - let rec start_element_handler parser_ ctx tag attr_list = - do_text parser_ ctx; - let parent = top ctx in - let n = { tag = QName.of_string tag; - preorder = next ctx; - data = ""; - first_child = dummy; - next_sibling = dummy; - parent = parent; - } - in - if parent.first_child == dummy then parent.first_child <- n - else parent.next_sibling <- n; - push n ctx; - match attr_list with - [] -> () - | _ -> - start_element_handler parser_ ctx attr_map_string []; - List.iter (do_attribute parser_ ctx) attr_list; - end_element_handler parser_ ctx attr_map_string + val dummy : node + (** Dummy node that is guaranteed to never occur in any tree *) - and do_attribute parser_ ctx (att, value) = - let att_tag = " " ^ att in - start_element_handler parser_ ctx att_tag []; - start_element_handler parser_ ctx text_string []; - let n = top ctx in n.data <- value; - end_element_handler parser_ ctx text_string; - end_element_handler parser_ ctx att_tag + val load_xml_file : in_channel -> t + (** Takes a file descriptor and returns the XML data stored in the + corresponding file. Start at the current position in the file + descriptor (which is not necessarily the begining of file) + *) - and consume_closing ctx n = - if n.next_sibling != dummy then - let _ = pop ctx in consume_closing ctx (top ctx) + val load_xml_string : string -> t + (** Loads XML data stored in a string *) - and end_element_handler parser_ ctx tag = - do_text parser_ ctx; - let node = top ctx in - if node.first_child == dummy then node.first_child <- nil - else begin - node.next_sibling <- nil; - consume_closing ctx node - end + val print_xml : out_channel -> t -> node -> unit + (** Outputs the tree as an XML document on the given output_channel *) - and do_text parser_ ctx = - if Buffer.length ctx.text_buffer != 0 then - let s = Buffer.contents ctx.text_buffer in - Buffer.clear ctx.text_buffer; - start_element_handler parser_ ctx text_string []; - let node = top ctx in - node.data <- s; - end_element_handler parser_ ctx text_string + val root : t -> node + (** Returns the root of the tree *) + val first_child : t -> node -> node + (** [first_child t n] returns the first child of node [n] in tree [t]. + Returns [nil] if [n] is a leaf. Returns [nil] if [n == nil]. + *) + val next_sibling : t -> node -> node + (** [next_sibling t n] returns the next_sibling of node [n] in tree [t]. + Returns [nil] if [n] is the last child of a node. + Returns [nil] if [n == nil]. + *) - let character_data_handler parser_ ctx text = - Buffer.add_string ctx.text_buffer text + val parent : t -> node -> node + (** [next_sibling t n] returns the parent of node [n] in tree [t]. + Returns [nil] if [n] is the root of the tree. + Returns [nil] if [n == nil]. + *) - let create_parser () = - let ctx = { text_buffer = Buffer.create 512; - current_preorder = 0; - stack = [] } in - let parser_ = Expat.parser_create ~encoding:None in - Expat.set_start_element_handler parser_ (start_element_handler parser_ ctx); - Expat.set_end_element_handler parser_ (end_element_handler parser_ ctx); - Expat.set_character_data_handler parser_ (character_data_handler parser_ ctx); - push { tag = QName.document; - preorder = next ctx; - data = ""; - first_child = dummy; - next_sibling = dummy; - parent = nil; - } ctx; - (parser_, - fun () -> - let node = top ctx in - node.next_sibling <- nil; - consume_closing ctx node; - match ctx.stack with - [ root ] -> - root.next_sibling <- nil; - { root = root } - | _ -> raise (Expat.Expat_error Expat.UNCLOSED_TOKEN) - ) + val tag : t -> node -> QName.t + (** Returns the label of a given node *) + val data : t -> node -> string + (** Returns the character data associated with a node. + The only node having character data are those whose label is + QName.text, QName.cdata_section or QName.comment + *) - let parse_string s = - let parser_, finalize = create_parser () in - Expat.parse parser_ s; - finalize () + val kind : t -> node -> NodeKind.t + (** Returns the kind of the given node *) - let parse_file fd = - let buffer = String.create 4096 in - let parser_, finalize = create_parser () in - let rec loop () = - let read = input fd buffer 0 4096 in - if read != 0 then - let () = Expat.parse_sub parser_ buffer 0 read in - loop () - in loop (); finalize () + val preorder : t -> node -> int + (** [preorder t n] returns the pre-order position of [n] in [t]. + [preodrder t (root t) == 0] and [preorder t nil < 0]. + *) + val by_preorder : t -> int -> node + (** [by_preorder t i] returns the node with preorder [i] + *) + val print_node : Format.formatter -> node -> unit end - - -let load_xml_file = Parser.parse_file -let load_xml_string = Parser.parse_string - - -let output_escape_string out s = - for i = 0 to String.length s - 1 do - match s.[i] with - | '<' -> output_string out "<" - | '>' -> output_string out ">" - | '&' -> output_string out "&" - | '"' -> output_string out """ - | '\'' -> output_string out "'" - | c -> output_char out c - done - -let rec print_attributes out tree_ node = - if node != nil then begin - output_string out (QName.to_string node.tag); - output_string out "=\""; - output_escape_string out node.first_child.data; - output_char out '"'; - print_attributes out tree_ node.next_sibling - end - -let rec print_xml out tree_ node = - if node != nil then - let () = - if node.tag == QName.text then - output_escape_string out node.data - else - let tag = QName.to_string node.tag in - output_char out '<'; - output_string out tag; - let fchild = - if node.first_child.tag == QName.attribute_map then - let () = - print_attributes out tree_ node.first_child.first_child - in - node.first_child.next_sibling - else - node.first_child - in - if fchild == nil then output_string out "/>" - else begin - output_char out '>'; - print_xml out tree_ fchild; - output_string out "' - end - in - print_xml out tree_ node.next_sibling - - -let root t = t.root -let first_child _ n = n.first_child -let next_sibling _ n = n.next_sibling -let parent _ n = n.parent -let tag _ n = n.tag -let data _ n = n.data -let preorder _ n = n.preorder