X-Git-Url: http://git.nguyen.vg/gitweb/?p=tatoo.git;a=blobdiff_plain;f=src%2Ftree%2Fnaive.ml;fp=src%2Ftree%2Fnaive.ml;h=1086b1e793564a3dc9219f3fd4fe2f811321b64a;hp=0000000000000000000000000000000000000000;hb=30bc0bb1291426e5e26eb2dee1ffc41e4c246349;hpb=d9c0e4863807eaf472e875a4bad35cfefe985c95 diff --git a/src/tree/naive.ml b/src/tree/naive.ml new file mode 100644 index 0000000..1086b1e --- /dev/null +++ b/src/tree/naive.ml @@ -0,0 +1,273 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +(* + Time-stamp: +*) +open Utils + +type node = { + tag : QName.t; + preorder : int; + mutable data : string; + mutable first_child : node; + mutable next_sibling : node; + mutable parent: node; +} + + + +let rec nil = { + tag = QName.nil; + preorder = -1; + data = ""; + first_child = nil; + next_sibling = nil; + parent = nil; +} + +let dummy_tag = QName.of_string "#dummy" +let rec dummy = { + tag = dummy_tag; + preorder = -1; + data = ""; + first_child = dummy; + next_sibling = dummy; + parent = dummy; +} + + +type t = { + root : node; + (* TODO add other intersting stuff *) +} + + + +module Parser = +struct + + type context = { + mutable stack : node list; + text_buffer : Buffer.t; + mutable current_preorder : int; + } + + let print_node_ptr fmt n = + Format.fprintf fmt "<%s>" + (if n == nil then "NIL" else + if n == dummy then "DUMMY" else + "NODE " ^ string_of_int n.preorder) + + let debug_node fmt node = + Format.fprintf fmt "{ tag=%s; preorder=%i; data=%s; first_child=%a; next_sibling=%a; parent=%a }" + (QName.to_string node.tag) + node.preorder + node.data + print_node_ptr node.first_child + print_node_ptr node.next_sibling + print_node_ptr node.parent + + + let debug_ctx fmt ctx = + Format.fprintf fmt "Current context: { preorder = %i\n; stack = \n%a\n }\n-------------\n" + ctx.current_preorder + (Pretty.print_list ~sep:";\n" debug_node) ctx.stack + + + let push n ctx = ctx.stack <- n :: ctx.stack + let pop ctx = + match ctx.stack with + [] -> failwith "XML parse error" + | e :: l -> ctx.stack <- l; e + + let top ctx = match ctx.stack with + | [] -> failwith "XML parse error" + | e :: _ -> e + + let next ctx = + let i = ctx.current_preorder in + ctx.current_preorder <- 1 + i; + i + + let is_left n = n.next_sibling == dummy + + + let text_string = QName.to_string QName.text + let attr_map_string = QName.to_string QName.attribute_map + + let rec start_element_handler parser_ ctx tag attr_list = + do_text parser_ ctx; + let parent = top ctx in + let n = { tag = QName.of_string tag; + preorder = next ctx; + data = ""; + first_child = dummy; + next_sibling = dummy; + parent = parent; + } + in + if parent.first_child == dummy then parent.first_child <- n + else parent.next_sibling <- n; + push n ctx; + match attr_list with + [] -> () + | _ -> + start_element_handler parser_ ctx attr_map_string []; + List.iter (do_attribute parser_ ctx) attr_list; + end_element_handler parser_ ctx attr_map_string + + and do_attribute parser_ ctx (att, value) = + let att_tag = " " ^ att in + start_element_handler parser_ ctx att_tag []; + start_element_handler parser_ ctx text_string []; + let n = top ctx in n.data <- value; + end_element_handler parser_ ctx text_string; + end_element_handler parser_ ctx att_tag + + and consume_closing ctx n = + if n.next_sibling != dummy then + let _ = pop ctx in consume_closing ctx (top ctx) + + and end_element_handler parser_ ctx tag = + do_text parser_ ctx; + let node = top ctx in + if node.first_child == dummy then node.first_child <- nil + else begin + node.next_sibling <- nil; + consume_closing ctx node + end + + and do_text parser_ ctx = + if Buffer.length ctx.text_buffer != 0 then + let s = Buffer.contents ctx.text_buffer in + Buffer.clear ctx.text_buffer; + start_element_handler parser_ ctx text_string []; + let node = top ctx in + node.data <- s; + end_element_handler parser_ ctx text_string + + + + let character_data_handler parser_ ctx text = + Buffer.add_string ctx.text_buffer text + + let create_parser () = + let ctx = { text_buffer = Buffer.create 512; + current_preorder = 0; + stack = [] } in + let parser_ = Expat.parser_create ~encoding:None in + Expat.set_start_element_handler parser_ (start_element_handler parser_ ctx); + Expat.set_end_element_handler parser_ (end_element_handler parser_ ctx); + Expat.set_character_data_handler parser_ (character_data_handler parser_ ctx); + push { tag = QName.document; + preorder = next ctx; + data = ""; + first_child = dummy; + next_sibling = dummy; + parent = nil; + } ctx; + (parser_, + fun () -> + let node = top ctx in + node.next_sibling <- nil; + consume_closing ctx node; + match ctx.stack with + [ root ] -> + root.next_sibling <- nil; + { root = root } + | _ -> raise (Expat.Expat_error Expat.UNCLOSED_TOKEN) + ) + + + let parse_string s = + let parser_, finalize = create_parser () in + Expat.parse parser_ s; + finalize () + + let parse_file fd = + let buffer = String.create 4096 in + let parser_, finalize = create_parser () in + let rec loop () = + let read = input fd buffer 0 4096 in + if read != 0 then + let () = Expat.parse_sub parser_ buffer 0 read in + loop () + in loop (); finalize () + +end + + +let load_xml_file = Parser.parse_file +let load_xml_string = Parser.parse_string + + +let output_escape_string out s = + for i = 0 to String.length s - 1 do + match s.[i] with + | '<' -> output_string out "<" + | '>' -> output_string out ">" + | '&' -> output_string out "&" + | '"' -> output_string out """ + | '\'' -> output_string out "'" + | c -> output_char out c + done + +let rec print_attributes out tree_ node = + if node != nil then begin + output_string out (QName.to_string node.tag); + output_string out "=\""; + output_escape_string out node.first_child.data; + output_char out '"'; + print_attributes out tree_ node.next_sibling + end + +let rec print_xml out tree_ node = + if node != nil then + let () = + if node.tag == QName.text then + output_escape_string out node.data + else + let tag = QName.to_string node.tag in + output_char out '<'; + output_string out tag; + let fchild = + if node.first_child.tag == QName.attribute_map then + let () = + print_attributes out tree_ node.first_child.first_child + in + node.first_child.next_sibling + else + node.first_child + in + if fchild == nil then output_string out "/>" + else begin + output_char out '>'; + print_xml out tree_ fchild; + output_string out "' + end + in + print_xml out tree_ node.next_sibling + + +let root t = t.root +let first_child _ n = n.first_child +let next_sibling _ n = n.next_sibling +let parent _ n = n.parent +let tag _ n = n.tag +let data _ n = n.data +let preorder _ n = n.preorder