From: Kim Nguyễn Date: Wed, 12 Apr 2017 09:30:11 +0000 (+0200) Subject: Add a compact tree model. X-Git-Url: http://git.nguyen.vg/gitweb/?p=tatoo.git;a=commitdiff_plain;h=f41ff8d936d971eb0712e458826f6555b83746da Add a compact tree model. --- diff --git a/src/compact_node_list.ml b/src/compact_node_list.ml new file mode 100644 index 0000000..d1a74ff --- /dev/null +++ b/src/compact_node_list.ml @@ -0,0 +1,16 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2017 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +include Deque.Make (struct type t = Compact_tree.node end) diff --git a/src/compact_tree.ml b/src/compact_tree.ml new file mode 100644 index 0000000..8329499 --- /dev/null +++ b/src/compact_tree.ml @@ -0,0 +1,171 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2016 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +type node = int + +let dummy_tag = QName.of_string "#dummy" + (* +open Bigarray +*) + +(* type table = (int, int16_unsigned_elt, c_layout) Array1.t *) +type table = int array +type t = { + table : table; + kind : Bytes.t; + tags : QName.t array; + data : string array; +} + + +(* encoding : + i -> kind | QNameId lsl 8 + i + 1 -> fc + i + 2 -> ns + i + 3 -> p +*) + +let next i = i + 3 +let idx i = i / 3 + +let dummy = 0 +let nil = next (dummy) +let root _t = next (next dummy) + +let size t = (idx (Array.length t.table)) - 2 + +let first_child t n = + Array.unsafe_get t.table (n + 0) + +let next_sibling t n = + Array.unsafe_get t.table (n + 1) + +let parent t n = + Array.unsafe_get t.table (n + 2) + +let tag t n = + Array.unsafe_get t.tags (idx n) + +let kind t n : Tree.NodeKind.t = + Obj.magic (String.unsafe_get t.kind (idx n)) + +let preorder t n = + (idx n) - 2 + +let data t n = Array.unsafe_get t.data (idx n) +let by_preorder _ i = (i+2) / 3 + + +let output_escape_string out s = + for i = 0 to String.length s - 1 do + match s.[i] with + | '<' -> output_string out "<" + | '>' -> output_string out ">" + | '&' -> output_string out "&" + | '"' -> output_string out """ + | '\'' -> output_string out "'" + | c -> output_char out c + done + + +let rec print_attributes ?(sep=true) stop out tree node = + if (kind tree node == Tree.NodeKind.Attribute) && node != stop then + let tag = QName.to_string (tag tree node) in + if sep then output_char out ' '; + output_string out tag; + output_string out "=\""; + output_escape_string out (data tree node); + output_char out '\"'; + print_attributes stop out tree (next_sibling tree node) + else + node + +let rec print_xml stop out tree node = + if node != nil && node != stop then + let () = + let open Tree.NodeKind in + match kind tree node with + | Node -> () + | Text -> output_escape_string out (data tree node) + | Element | Document -> + let tag = QName.to_string (tag tree node)in + output_char out '<'; + output_string out tag; + let fchild = print_attributes stop out tree (first_child tree node) in + if fchild == nil then output_string out "/>" + else begin + output_char out '>'; + print_xml stop out tree fchild; + output_string out "' + end + | Attribute -> ignore (print_attributes stop ~sep:false out tree node) + | Comment -> + output_string out "" + | ProcessingInstruction -> + output_string out "" + in + print_xml stop out tree (next_sibling tree node) + +let print_xml out tree node = + print_xml (next_sibling tree node) out tree node + + +let mk_node table_a data_a kind_a tags_a i kind tag data pre_fc pre_ns pre_p = + try + data_a.(i+2) <- data; + tags_a.(i+2) <- tag; + Bytes.unsafe_set kind_a (i+2) (Obj.magic kind); + let n = (i+2) * 3 in + let fc = (pre_fc + 2) * 3 in + let ns = (pre_ns + 2) * 3 in + let p = (pre_p + 2) * 3 in + table_a.(n) <- fc; + table_a.(n + 1) <- ns; + table_a.(n + 2) <- p + with _ -> assert false + + +let of_naive t = + let s = Naive_tree.size t in + let len = (s + 2) * 3 in + let table = Array.make len ~-1 (* int16_unsigned c_layout len *) in + let data = Array.make (s + 2) "" in + let tags = Array.make (s+2) dummy_tag in + let kind = Bytes.make (s+2) '\000' in + mk_node table data kind tags ~-2 Tree.NodeKind.Element dummy_tag "" dummy dummy dummy; + mk_node table data kind tags ~-1 Tree.NodeKind.Element QName.nil "" nil nil nil; + for i = 0 to s - 1 do + let node = Naive_tree.by_preorder t i in + mk_node table data kind tags i (Naive_tree.kind t node) + (Naive_tree.tag t node) + (Naive_tree.data t node) + (Naive_tree.preorder t (Naive_tree.first_child t node)) + (Naive_tree.preorder t (Naive_tree.next_sibling t node)) + (Naive_tree.preorder t (Naive_tree.parent t node)) + done; + { data; table; tags ; kind } + +let load_xml_string s = of_naive (Naive_tree.load_xml_string s) +let load_xml_file f = of_naive (Naive_tree.load_xml_file f) + +let print_node fmt n = Format.fprintf fmt "%d" n diff --git a/src/compact_tree.mli b/src/compact_tree.mli new file mode 100644 index 0000000..d77557b --- /dev/null +++ b/src/compact_tree.mli @@ -0,0 +1,16 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +include Tree.S diff --git a/src/tatoo.ml b/src/tatoo.ml index 20bc903..5dec2f6 100644 --- a/src/tatoo.ml +++ b/src/tatoo.ml @@ -50,7 +50,7 @@ let main () = | Some input -> let fd = open_in input in fd, fun () -> close_in fd in - let d = time Naive_tree.load_xml_file fd "parsing xml document" in + let d = time Compact_tree.load_xml_file fd "parsing xml document" in close_fd (); d in let queries = @@ -104,10 +104,10 @@ let main () = Logger.msg `STATS "@[Automaton: @\n%a@]" Ata.print auto) auto_list; end; - let module Naive = Run.Make(Naive_tree)(Naive_node_list) in + let module Naive = Run.Make(Compact_tree)(Compact_node_list) in let result_list = - let root = Naive_node_list.create () in - let () = Naive_node_list.add (Naive_tree.root doc) root in + let root = Compact_node_list.create () in + let () = Compact_node_list.add (Compact_tree.root doc) root in let f, msg = match !Options.parallel, !Options.compose with true, true -> @@ -138,11 +138,11 @@ let main () = output_string output (string_of_int !count); output_string output "\" >\n"; if !Options.count then begin - output_string output (string_of_int (Naive_node_list.length results)); + output_string output (string_of_int (Compact_node_list.length results)); output_char output '\n'; end else - Naive_node_list.iter (fun n -> - Naive_tree.print_xml output doc n; + Compact_node_list.iter (fun n -> + Compact_tree.print_xml output doc n; output_char output '\n' ) results; output_string output "\n"; @@ -166,4 +166,6 @@ let () = Some s -> ("file " ^ s) | None -> "[stdin]") msg; exit 3 | Xpath.Ulexer.Error (s, e, msg) -> eprintf "Error: character %i-%i: %s\n%!" s e msg; exit 4 - | e -> eprintf "FATAL ERROR: %s\n%!" (Printexc.to_string e); exit 128 +(* | e -> Printexc.print_backtrace stderr; + flush stderr; + eprintf "FATAL ERROR: %s\n%!" (Printexc.to_string e); exit 128 *)