--- /dev/null
+(***********************************************************************)
+(* *)
+(* TAToo *)
+(* *)
+(* Kim Nguyen, LRI UMR8623 *)
+(* Université Paris-Sud & CNRS *)
+(* *)
+(* Copyright 2010-2016 Université Paris-Sud and Centre National de la *)
+(* Recherche Scientifique. All rights reserved. This file is *)
+(* distributed under the terms of the GNU Lesser General Public *)
+(* License, with the special exception on linking described in file *)
+(* ../LICENSE. *)
+(* *)
+(***********************************************************************)
+
+type node = int
+
+let dummy_tag = QName.of_string "#dummy"
+ (*
+open Bigarray
+*)
+
+(* type table = (int, int16_unsigned_elt, c_layout) Array1.t *)
+type table = int array
+type t = {
+ table : table;
+ kind : Bytes.t;
+ tags : QName.t array;
+ data : string array;
+}
+
+
+(* encoding :
+ i -> kind | QNameId lsl 8
+ i + 1 -> fc
+ i + 2 -> ns
+ i + 3 -> p
+*)
+
+let next i = i + 3
+let idx i = i / 3
+
+let dummy = 0
+let nil = next (dummy)
+let root _t = next (next dummy)
+
+let size t = (idx (Array.length t.table)) - 2
+
+let first_child t n =
+ Array.unsafe_get t.table (n + 0)
+
+let next_sibling t n =
+ Array.unsafe_get t.table (n + 1)
+
+let parent t n =
+ Array.unsafe_get t.table (n + 2)
+
+let tag t n =
+ Array.unsafe_get t.tags (idx n)
+
+let kind t n : Tree.NodeKind.t =
+ Obj.magic (String.unsafe_get t.kind (idx n))
+
+let preorder t n =
+ (idx n) - 2
+
+let data t n = Array.unsafe_get t.data (idx n)
+let by_preorder _ i = (i+2) / 3
+
+
+let output_escape_string out s =
+ for i = 0 to String.length s - 1 do
+ match s.[i] with
+ | '<' -> output_string out "<"
+ | '>' -> output_string out ">"
+ | '&' -> output_string out "&"
+ | '"' -> output_string out """
+ | '\'' -> output_string out "'"
+ | c -> output_char out c
+ done
+
+
+let rec print_attributes ?(sep=true) stop out tree node =
+ if (kind tree node == Tree.NodeKind.Attribute) && node != stop then
+ let tag = QName.to_string (tag tree node) in
+ if sep then output_char out ' ';
+ output_string out tag;
+ output_string out "=\"";
+ output_escape_string out (data tree node);
+ output_char out '\"';
+ print_attributes stop out tree (next_sibling tree node)
+ else
+ node
+
+let rec print_xml stop out tree node =
+ if node != nil && node != stop then
+ let () =
+ let open Tree.NodeKind in
+ match kind tree node with
+ | Node -> ()
+ | Text -> output_escape_string out (data tree node)
+ | Element | Document ->
+ let tag = QName.to_string (tag tree node)in
+ output_char out '<';
+ output_string out tag;
+ let fchild = print_attributes stop out tree (first_child tree node) in
+ if fchild == nil then output_string out "/>"
+ else begin
+ output_char out '>';
+ print_xml stop out tree fchild;
+ output_string out "</";
+ output_string out tag;
+ output_char out '>'
+ end
+ | Attribute -> ignore (print_attributes stop ~sep:false out tree node)
+ | Comment ->
+ output_string out "<!--";
+ output_string out (data tree node);
+ output_string out "-->"
+ | ProcessingInstruction ->
+ output_string out "<?";
+ output_string out (QName.to_string (tag tree node));
+ output_char out ' ';
+ output_string out (data tree node);
+ output_string out "?>"
+ in
+ print_xml stop out tree (next_sibling tree node)
+
+let print_xml out tree node =
+ print_xml (next_sibling tree node) out tree node
+
+
+let mk_node table_a data_a kind_a tags_a i kind tag data pre_fc pre_ns pre_p =
+ try
+ data_a.(i+2) <- data;
+ tags_a.(i+2) <- tag;
+ Bytes.unsafe_set kind_a (i+2) (Obj.magic kind);
+ let n = (i+2) * 3 in
+ let fc = (pre_fc + 2) * 3 in
+ let ns = (pre_ns + 2) * 3 in
+ let p = (pre_p + 2) * 3 in
+ table_a.(n) <- fc;
+ table_a.(n + 1) <- ns;
+ table_a.(n + 2) <- p
+ with _ -> assert false
+
+
+let of_naive t =
+ let s = Naive_tree.size t in
+ let len = (s + 2) * 3 in
+ let table = Array.make len ~-1 (* int16_unsigned c_layout len *) in
+ let data = Array.make (s + 2) "" in
+ let tags = Array.make (s+2) dummy_tag in
+ let kind = Bytes.make (s+2) '\000' in
+ mk_node table data kind tags ~-2 Tree.NodeKind.Element dummy_tag "" dummy dummy dummy;
+ mk_node table data kind tags ~-1 Tree.NodeKind.Element QName.nil "" nil nil nil;
+ for i = 0 to s - 1 do
+ let node = Naive_tree.by_preorder t i in
+ mk_node table data kind tags i (Naive_tree.kind t node)
+ (Naive_tree.tag t node)
+ (Naive_tree.data t node)
+ (Naive_tree.preorder t (Naive_tree.first_child t node))
+ (Naive_tree.preorder t (Naive_tree.next_sibling t node))
+ (Naive_tree.preorder t (Naive_tree.parent t node))
+ done;
+ { data; table; tags ; kind }
+
+let load_xml_string s = of_naive (Naive_tree.load_xml_string s)
+let load_xml_file f = of_naive (Naive_tree.load_xml_file f)
+
+let print_node fmt n = Format.fprintf fmt "%d" n
| Some input ->
let fd = open_in input in fd, fun () -> close_in fd
in
- let d = time Naive_tree.load_xml_file fd "parsing xml document" in
+ let d = time Compact_tree.load_xml_file fd "parsing xml document" in
close_fd (); d
in
let queries =
Logger.msg `STATS "@[Automaton: @\n%a@]" Ata.print auto) auto_list;
end;
- let module Naive = Run.Make(Naive_tree)(Naive_node_list) in
+ let module Naive = Run.Make(Compact_tree)(Compact_node_list) in
let result_list =
- let root = Naive_node_list.create () in
- let () = Naive_node_list.add (Naive_tree.root doc) root in
+ let root = Compact_node_list.create () in
+ let () = Compact_node_list.add (Compact_tree.root doc) root in
let f, msg =
match !Options.parallel, !Options.compose with
true, true ->
output_string output (string_of_int !count);
output_string output "\" >\n";
if !Options.count then begin
- output_string output (string_of_int (Naive_node_list.length results));
+ output_string output (string_of_int (Compact_node_list.length results));
output_char output '\n';
end else
- Naive_node_list.iter (fun n ->
- Naive_tree.print_xml output doc n;
+ Compact_node_list.iter (fun n ->
+ Compact_tree.print_xml output doc n;
output_char output '\n'
) results;
output_string output "</xml_result>\n";
Some s -> ("file " ^ s)
| None -> "[stdin]") msg; exit 3
| Xpath.Ulexer.Error (s, e, msg) -> eprintf "Error: character %i-%i: %s\n%!" s e msg; exit 4
- | e -> eprintf "FATAL ERROR: %s\n%!" (Printexc.to_string e); exit 128
+(* | e -> Printexc.print_backtrace stderr;
+ flush stderr;
+ eprintf "FATAL ERROR: %s\n%!" (Printexc.to_string e); exit 128 *)