X-Git-Url: http://git.nguyen.vg/gitweb/?p=tatoo.git;a=blobdiff_plain;f=src%2Fnaive_tree.ml;fp=src%2Fnaive_tree.ml;h=b7c0be641833b117a587a67f2364768eee5137d4;hp=0000000000000000000000000000000000000000;hb=b00bff88c7902e828804c06b7f9dc55222fdc84e;hpb=03b6a364e7240ca827585e7baff225a0aaa33bc6 diff --git a/src/naive_tree.ml b/src/naive_tree.ml new file mode 100644 index 0000000..b7c0be6 --- /dev/null +++ b/src/naive_tree.ml @@ -0,0 +1,313 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +(* + Time-stamp: +*) + +type node = { + tag : QName.t; + preorder : int; + mutable kind : Tree.NodeKind.t; + mutable data : string; + mutable first_child : node; + mutable next_sibling : node; + mutable parent: node; +} + + + +let rec nil = { + tag = QName.nil; + kind = Tree.NodeKind.Element; + preorder = -1; + data = ""; + first_child = nil; + next_sibling = nil; + parent = nil; +} + +let dummy_tag = QName.of_string "#dummy" +let rec dummy = { + tag = dummy_tag; + kind = Tree.NodeKind.Element; + preorder = -1; + data = ""; + first_child = dummy; + next_sibling = dummy; + parent = dummy; +} + + +type t = { + root : node; + size : int; + (* TODO add other intersting stuff *) +} + + + +module Parser = +struct + + type context = { + mutable stack : node list; + text_buffer : Buffer.t; + mutable current_preorder : int; + } + + let print_node_ptr fmt n = + Format.fprintf fmt "<%s>" + (if n == nil then "NIL" else + if n == dummy then "DUMMY" else + "NODE " ^ string_of_int n.preorder) + + let debug_node fmt node = + Format.fprintf fmt "{ tag=%s; preorder=%i; data=%S; first_child=%a; next_sibling=%a; parent=%a }" + (QName.to_string node.tag) + node.preorder + node.data + print_node_ptr node.first_child + print_node_ptr node.next_sibling + print_node_ptr node.parent + + + let debug_ctx fmt ctx = + Format.fprintf fmt "Current context: { preorder = %i\n; stack = \n%a\n }\n-------------\n" + ctx.current_preorder + (Pretty.print_list ~sep:";\n" debug_node) ctx.stack + + + let push n ctx = ctx.stack <- n :: ctx.stack + let pop ctx = + match ctx.stack with + [] -> failwith "XML parse error" + | e :: l -> ctx.stack <- l; e + + let top ctx = match ctx.stack with + | [] -> failwith "XML parse error" + | e :: _ -> e + + let next ctx = + let i = ctx.current_preorder in + ctx.current_preorder <- 1 + i; + i + + let is_left n = n.next_sibling == dummy + + + let text_string = QName.to_string QName.text + let comment_string = QName.to_string QName.comment + + + let rec start_element_handler parser_ ctx tag attr_list = + do_text parser_ ctx; + let parent = top ctx in + let n = { tag = QName.of_string tag; + kind = Tree.NodeKind.Element; + preorder = next ctx; + data = ""; + first_child = dummy; + next_sibling = dummy; + parent = parent; + } + in + if parent.first_child == dummy then parent.first_child <- n + else parent.next_sibling <- n; + push n ctx; + List.iter (do_attribute parser_ ctx) attr_list + + and do_attribute parser_ ctx (att, value) = + let att_tag = QName.to_string (QName.attribute (QName.of_string att)) in + start_element_handler parser_ ctx att_tag []; + let n = top ctx in + n.data <- value; + n.kind <- Tree.NodeKind.Attribute; + end_element_handler parser_ ctx att_tag + + and consume_closing ctx n = + if n.next_sibling != dummy then + let _ = pop ctx in consume_closing ctx (top ctx) + + and end_element_handler parser_ ctx _ = + do_text parser_ ctx; + let node = top ctx in + if node.first_child == dummy then node.first_child <- nil + else begin + node.next_sibling <- nil; + consume_closing ctx node + end + + and do_text parser_ ctx = + if Buffer.length ctx.text_buffer != 0 then + let s = Buffer.contents ctx.text_buffer in + Buffer.clear ctx.text_buffer; + start_element_handler parser_ ctx text_string []; + let node = top ctx in + node.data <- s; + node.kind <- Tree.NodeKind.Text; + end_element_handler parser_ ctx text_string + + and comment_handler parser_ ctx s = + do_text parser_ ctx; + start_element_handler parser_ ctx comment_string []; + let node = top ctx in + node.data <- s; + node.kind <- Tree.NodeKind.Comment; + end_element_handler parser_ ctx comment_string + + and processing_instruction_handler parser_ ctx tag data = + do_text parser_ ctx; + let pi = QName.to_string + (QName.processing_instruction (QName.of_string tag)) + in + start_element_handler parser_ ctx pi []; + let node = top ctx in + node.data <- data; + node.kind <- Tree.NodeKind.ProcessingInstruction; + end_element_handler parser_ ctx pi + + + let character_data_handler _parser ctx text = + Buffer.add_string ctx.text_buffer text + + let create_parser () = + let ctx = { text_buffer = Buffer.create 512; + current_preorder = 0; + stack = [] } in + let psr = Expat.parser_create ~encoding:None in + Expat.set_start_element_handler psr (start_element_handler psr ctx); + Expat.set_end_element_handler psr (end_element_handler psr ctx); + Expat.set_character_data_handler + psr (character_data_handler psr ctx); + Expat.set_comment_handler psr (comment_handler psr ctx); + Expat.set_processing_instruction_handler psr + (processing_instruction_handler psr ctx); + push { tag = QName.document; + preorder = next ctx; + kind = Tree.NodeKind.Document; + data = ""; + first_child = dummy; + next_sibling = dummy; + parent = nil; + } ctx; + (psr, + fun () -> + let node = top ctx in + node.next_sibling <- nil; + consume_closing ctx node; + match ctx.stack with + [ root ] -> + root.next_sibling <- nil; + { root = root; + size = ctx.current_preorder + } + | _ -> raise (Expat.Expat_error Expat.UNCLOSED_TOKEN) + ) + + + let parse_string s = + let parser_, finalize = create_parser () in + Expat.parse parser_ s; + finalize () + + let parse_file fd = + let buffer = String.create 4096 in + let parser_, finalize = create_parser () in + let rec loop () = + let read = input fd buffer 0 4096 in + if read != 0 then + let () = Expat.parse_sub parser_ buffer 0 read in + loop () + in loop (); finalize () + +end + + +let load_xml_file = Parser.parse_file +let load_xml_string = Parser.parse_string + +let output_escape_string out s = + for i = 0 to String.length s - 1 do + match s.[i] with + | '<' -> output_string out "<" + | '>' -> output_string out ">" + | '&' -> output_string out "&" + | '"' -> output_string out """ + | '\'' -> output_string out "'" + | c -> output_char out c + done + + +let rec print_attributes ?(sep=true) out tree_ node = + if (node.kind == Tree.NodeKind.Attribute) then + let tag = QName.to_string (QName.remove_prefix node.tag) in + if sep then output_char out ' '; + output_string out tag; + output_string out "=\""; + output_escape_string out node.data; + output_char out '\"'; + print_attributes out tree_ node.next_sibling + else + node + +let rec print_xml out tree_ node = + if node != nil then + let () = + let open Tree.NodeKind in + match node.kind with + | Node -> () + | Text -> output_escape_string out node.data + | Element | Document -> + let tag = QName.to_string node.tag in + output_char out '<'; + output_string out tag; + let fchild = print_attributes out tree_ node.first_child in + if fchild == nil then output_string out "/>" + else begin + output_char out '>'; + print_xml out tree_ fchild; + output_string out "' + end + | Attribute -> ignore (print_attributes ~sep:false out tree_ node) + | Comment -> + output_string out "" + | ProcessingInstruction -> + output_string out "" + in + print_xml out tree_ node.next_sibling + +let print_xml out tree_ node = + let nnode = { node with next_sibling = nil } in print_xml out tree_ nnode + +let root t = t.root +let size t = t.size +let first_child _ n = n.first_child +let next_sibling _ n = n.next_sibling +let parent _ n = n.parent +let tag _ n = n.tag +let data _ n = n.data +let kind _ n = n.kind +let preorder _ n = n.preorder + +let print_node fmt n = Parser.debug_node fmt n