From 451e60ad59e35344dff62da5ca27fcd5eec1bff9 Mon Sep 17 00:00:00 2001 From: kim Date: Thu, 30 Apr 2009 14:24:47 +0000 Subject: [PATCH] Commit before changing Tree.ml interface git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@365 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- Makefile | 6 +- OCamlDriver.cpp | 61 +++- SXSIStorageInterface.cpp | 15 +- SXSIStorageInterface.h | 2 + XMLDocShredder.cpp | 53 ++- ata.ml | 38 ++- debug.ml | 2 + hlist.ml | 3 +- hlist.mli | 1 + html_header.ml | 211 ------------ html_trace.ml | 268 +++++++++++++++ tag.ml | 10 +- tag.mli | 7 +- tests/test.xml | 9 +- tree.ml | 712 +++++++++------------------------------ tree.mli | 39 ++- 16 files changed, 584 insertions(+), 853 deletions(-) delete mode 100644 html_header.ml create mode 100644 html_trace.ml diff --git a/Makefile b/Makefile index 7b89c53..afa1e3b 100644 --- a/Makefile +++ b/Makefile @@ -53,9 +53,9 @@ endif ifeq ($(PROFILE), true) PROFILE_FLAGS = -p -SYNT_PROF = $(SYNT_DEBUG) -ppopt -DPROFILE +SYNT_PROF = -ppopt -DPROFILE endif - +SYNT_FLAGS = $(SYNT_DEBUG) $(SYNT_PROF) OPT_FLAGS = $(DEBUG_FLAGS) $(PROFILE_FLAGS) -nodynlink OCAMLOPT = ocamlopt -cc "$(CXX)" $(OPT_FLAGS) -ccopt -O3 -ccopt -std=c++0x -noassert -inline $(INLINE) @@ -67,7 +67,7 @@ OCAMLDEP = ocamldep #Ugly but seems difficult with a makefile LINK=$(OCAMLOPT) -linkpkg `ocamlc -version | grep -q "3.11.0" && echo dynlink.cmxa` camlp4lib.cmxa -SYNTAX= -syntax camlp4o $(PPINCLUDES) -ppopt pa_macro.cmo $(SYNT_PROF) +SYNTAX= -syntax camlp4o $(PPINCLUDES) -ppopt pa_macro.cmo $(SYNT_FLAGS) diff --git a/OCamlDriver.cpp b/OCamlDriver.cpp index 78d3e15..5b7e0b6 100644 --- a/OCamlDriver.cpp +++ b/OCamlDriver.cpp @@ -49,6 +49,7 @@ extern "C" { #define XMLTREE(x) ((XMLTree *)(* (XMLTree**) Data_custom_val(x))) #define TEXTCOLLECTION(x) #define TREENODEVAL(i) ((treeNode) (Int_val(i))) +#define XMLTREE_ROOT 0 extern "C" { static struct custom_operations ops; @@ -143,7 +144,7 @@ void traversal_rec(XMLTree* tree, treeNode id){ extern "C" CAMLprim value caml_cpp_traversal(value tree){ CAMLparam1(tree); - traversal_rec(XMLTREE(tree),XMLTREE(tree)->Root()); + traversal_rec(XMLTREE(tree),XMLTREE_ROOT); CAMLreturn(Val_unit); } @@ -219,7 +220,7 @@ extern "C" CAMLprim value caml_text_collection_unsorted_contains(value tree,valu extern "C" CAMLprim value caml_xml_tree_root(value tree){ CAMLparam1(tree); - CAMLreturn (Val_int(TREENODEVAL(XMLTREE(tree)->Root()))); + CAMLreturn (Val_int(TREENODEVAL(XMLTREE_ROOT))); } extern "C" CAMLprim value caml_xml_tree_text_collection(value tree){ CAMLparam1(tree); @@ -276,6 +277,17 @@ extern "C" CAMLprim value caml_xml_tree_first_child(value tree, value id){ CAMLreturn(Val_int (XMLTREE(tree)->FirstChild(TREENODEVAL(id)))); } +extern "C" CAMLprim value caml_xml_tree_tagged_child(value tree, value id, value tag){ + CAMLparam3(tree,id,tag); + CAMLreturn(Val_int (XMLTREE(tree)->TaggedChild(TREENODEVAL(id),Int_val(tag)))); +} + +extern "C" CAMLprim value caml_xml_tree_tagged_sibling(value tree, value id, value tag){ + CAMLparam3(tree,id,tag); + CAMLreturn(Val_int (XMLTREE(tree)->TaggedFollSibling(TREENODEVAL(id),Int_val(tag)))); +} + + extern "C" CAMLprim value caml_xml_tree_is_leaf(value tree, value id){ CAMLparam2(tree,id); CAMLreturn(Val_bool (XMLTREE(tree)->IsLeaf(TREENODEVAL(id)))); @@ -397,14 +409,45 @@ extern "C" CAMLprim value caml_int_vector_alloc(value len){ } extern "C" CAMLprim value caml_int_vector_set(value vec, value i, value v){ - CAMLparam3(vec,i,v); - + CAMLparam3(vec,i,v); ((int*) vec)[Int_val(i)+1] = Int_val(v); CAMLreturn (Val_unit); } #define VECT(x) ((int*) (x)) +extern "C" CAMLprim value caml_xml_tree_select_desc(value tree, value node, value tags){ + CAMLparam3(tree,node,tags); + + CAMLreturn (Val_int (XMLTREE(tree)->SelectDesc(TREENODEVAL(node), + &(VECT(tags)[1]), + VECT(tags)[0]))); +} +extern "C" CAMLprim value caml_xml_tree_select_child(value tree, value node, value tags){ + CAMLparam3(tree,node,tags); + + CAMLreturn (Val_int (XMLTREE(tree)->SelectChild(TREENODEVAL(node), + &(VECT(tags)[1]), + VECT(tags)[0]))); +} +extern "C" CAMLprim value caml_xml_tree_select_foll_sibling(value tree, value node, value tags){ + CAMLparam3(tree,node,tags); + + CAMLreturn (Val_int (XMLTREE(tree)->SelectFollSibling(TREENODEVAL(node), + &(VECT(tags)[1]), + VECT(tags)[0]))); +} +extern "C" CAMLprim value caml_xml_tree_select_foll_below(value tree, value node, value tags,value ctx){ + CAMLparam4(tree,node,tags,ctx); + + CAMLreturn (Val_int (XMLTREE(tree)->SelectFollBelow(TREENODEVAL(node), + &(VECT(tags)[1]), + VECT(tags)[0],Int_val(ctx)))); +} + + + +/* extern "C" CAMLprim value caml_xml_tree_select_below(value tree, value node, value ctags, value dtags){ CAMLparam4(tree,node,ctags,dtags); @@ -414,8 +457,9 @@ extern "C" CAMLprim value caml_xml_tree_select_below(value tree, value node, val VECT(ctags)[0], &(VECT(dtags)[1]), VECT(dtags)[0])))); -} - + } +*/ +/* extern "C" CAMLprim value caml_xml_tree_select_next(value tree, value node, value ctags, value ftags,value root){ CAMLparam5(tree,node,ctags,ftags,root); CAMLreturn (Val_int ( @@ -426,7 +470,8 @@ extern "C" CAMLprim value caml_xml_tree_select_next(value tree, value node, valu VECT(ftags)[0], TREENODEVAL(root))))); } - +*/ +/* extern "C" CAMLprim value caml_xml_tree_select_desc_only(value tree, value node,value dtags){ CAMLparam3(tree,node,dtags); @@ -453,7 +498,7 @@ extern "C" CAMLprim value caml_xml_tree_select_desc_or_foll_only(value tree, val VECT(ftags)[0], TREENODEVAL(root))))); } - +*/ extern "C" CAMLprim value caml_xml_tree_doc_ids(value tree, value node){ CAMLparam2(tree,node); CAMLlocal1(tuple); diff --git a/SXSIStorageInterface.cpp b/SXSIStorageInterface.cpp index 88ed42f..0a1a715 100644 --- a/SXSIStorageInterface.cpp +++ b/SXSIStorageInterface.cpp @@ -13,8 +13,9 @@ SXSIStorageInterface::SXSIStorageInterface(int sf,bool iet,bool dtc) { - tree = new XMLTree(); - tree->OpenDocument(iet,sf,dtc); + tree = NULL; + tb = new XMLTreeBuilder(); + tb ->OpenDocument(iet,sf,dtc); } SXSIStorageInterface::~SXSIStorageInterface() @@ -24,7 +25,7 @@ SXSIStorageInterface::~SXSIStorageInterface() void SXSIStorageInterface::newChild(string name) { _new_child++; - tree->NewOpenTag((unsigned char*) name.c_str()); + tb->NewOpenTag((unsigned char*) name.c_str()); } @@ -33,26 +34,26 @@ void SXSIStorageInterface::newText(string text) if (text.empty()) { _new_empty_text++; - tree->NewEmptyText(); + tb->NewEmptyText(); } else { _new_text++; _length_text += text.size(); - tree->NewText((unsigned char*) text.c_str()); + tb->NewText((unsigned char*) text.c_str()); } } void SXSIStorageInterface::nodeFinished(string name) { - tree->NewClosingTag((unsigned char*) name.c_str()); + tb->NewClosingTag((unsigned char*) name.c_str()); } void SXSIStorageInterface::parsingFinished() { - tree->CloseDocument(); + tree = tb->CloseDocument(); } diff --git a/SXSIStorageInterface.h b/SXSIStorageInterface.h index 943a000..bd65594 100644 --- a/SXSIStorageInterface.h +++ b/SXSIStorageInterface.h @@ -11,6 +11,7 @@ #define SXSISTORAGEINTERFACE_H_ #include "XMLTree.h" +#include "XMLTreeBuilder.h" #include "StorageInterface.h" extern "C" { #include @@ -40,6 +41,7 @@ class SXSIStorageInterface: public StorageInterface private: XMLTree* tree; + XMLTreeBuilder* tb; int _new_text; int _new_empty_text; int _new_child; diff --git a/XMLDocShredder.cpp b/XMLDocShredder.cpp index c048d2e..1516c9f 100644 --- a/XMLDocShredder.cpp +++ b/XMLDocShredder.cpp @@ -88,9 +88,6 @@ void XMLDocShredder::processStartElement() ustring name = reader_->get_name(); bool empty = false; - storageIfc_->newText(buffer); //prevText - buffer.erase(); - storageIfc_->newChild(name); /* We must be really carefull here. calling process attributes moves @@ -108,8 +105,7 @@ void XMLDocShredder::processStartElement() if (empty){ - storageIfc_->newText(""); //myText - storageIfc_->nodeFinished(name); + storageIfc_->nodeFinished(name); }; @@ -117,21 +113,19 @@ void XMLDocShredder::processStartElement() void XMLDocShredder::processEndElement() { - // tell the storage interface that the current node has been completely processed - storageIfc_->newText(buffer); //prevText - buffer.erase(); + // tell the storage interface that the current node has been completely processed storageIfc_->nodeFinished(reader_->get_name()); } void XMLDocShredder::processPCDATA() { - // send the content of this PCDATA node to the storage interface as a text node - - if (reader_->has_value()) - { - buffer += reader_->get_value(); - }; - + // send the content of this PCDATA node to the storage interface as a text node + + if (reader_->has_value()){ + storageIfc_->newChild("<$>"); + storageIfc_->newText(reader_->get_value()); + storageIfc_->nodeFinished("<$>"); + }; } void XMLDocShredder::processAttributes() @@ -139,7 +133,6 @@ void XMLDocShredder::processAttributes() reader_->move_to_first_attribute(); string nspaceStr = "xmlns"; - storageIfc_->newText(""); //prevText storageIfc_->newChild("<@>"); do { @@ -163,22 +156,26 @@ void XMLDocShredder::processAttributes() else { - storageIfc_->newText(""); //prevText - storageIfc_->newChild(name); + string attname = "<@>"+name; + storageIfc_->newChild(attname); + storageIfc_->newChild("<@$>"); storageIfc_->newText(value); - storageIfc_->nodeFinished(name); + storageIfc_->nodeFinished("<@$>"); + storageIfc_->nodeFinished(attname); } } while (reader_->move_to_next_attribute()); - storageIfc_->newText(""); //nextText storageIfc_->nodeFinished("<@>"); } void XMLDocShredder::processSignificantWhitespace() { - // each significant whitespace sequence constructs a text node - buffer += reader_->get_value(); - + + if (reader_->has_value()){ + storageIfc_->newChild("<$>"); + storageIfc_->newText(reader_->get_value()); + storageIfc_->nodeFinished("<$>"); + }; } void XMLDocShredder::processStartDocument(const string docName) @@ -192,7 +189,6 @@ void XMLDocShredder::processEndDocument() { /* tell the storage interface that document parsing has finished, and structures * can now be written to disk. */ - storageIfc_->newText(""); storageIfc_->nodeFinished(""); storageIfc_->parsingFinished(); } @@ -231,10 +227,11 @@ void XMLDocShredder::processCDATASection() * model. Instead, we simply pass the converted text value to the storage interface as * a text node attached to the current context node. */ - - ustring value = reader_->get_value(); - storageIfc_->newText(value); - last_text = true; + if (reader_->has_value()){ + storageIfc_->newChild("<$>"); + storageIfc_->newText(reader_->get_value()); + storageIfc_->nodeFinished("<$>"); + }; } diff --git a/ata.ml b/ata.ml index 79ffe6c..e06ac04 100644 --- a/ata.ml +++ b/ata.ml @@ -525,7 +525,6 @@ THEN INCLUDE "html_trace.ml" END - let mk_fun f s = D_IGNORE_(register_funname f s,f) let mk_app_fun f arg s = let g = f arg in D_IGNORE_(register_funname g ((get_funname f) ^ " " ^ s), g) @@ -555,22 +554,22 @@ END let choose_jump_down a b c d = choose_jump a b c d (mk_fun (Tree.mk_nil) "Tree.mk_nil") - (mk_fun (Tree.text_below) "Tree.text_below") - (mk_fun (fun _ -> Tree.node_child) "[TaggedChild]Tree.node_child") (* !! no tagged_child in Tree.ml *) - (mk_fun (fun _ -> Tree.node_child) "[SelectChild]Tree.node_child") (* !! no select_child in Tree.ml *) + (mk_fun (Tree.first_child) "Tree.text_below") + (mk_fun (Tree.tagged_child) "Tree.tagged_child") + (mk_fun (Tree.select_child) "Tree.select_child") (* !! no select_child in Tree.ml *) (mk_fun (Tree.tagged_desc) "Tree.tagged_desc") - (mk_fun (fun _ -> Tree.node_child ) "[SelectDesc]Tree.node_child") (* !! no select_desc *) - (mk_fun (Tree.node_child) "Tree.node_child") + (mk_fun (Tree.select_desc) "Tree.select_desc") (* !! no select_desc *) + (mk_fun (Tree.first_child) "Tree.first_child") let choose_jump_next a b c d = choose_jump a b c d (mk_fun (fun t _ -> Tree.mk_nil t) "Tree.mk_nil2") - (mk_fun (Tree.text_next) "Tree.text_next") - (mk_fun (fun _ -> Tree.node_sibling_ctx) "[TaggedSibling]Tree.node_sibling_ctx")(* !! no tagged_sibling in Tree.ml *) - (mk_fun (fun _ -> Tree.node_sibling_ctx) "[SelectSibling]Tree.node_sibling_ctx")(* !! no select_sibling in Tree.ml *) + (mk_fun (Tree.next_sibling_ctx) "Tree.text_next") + (mk_fun (Tree.tagged_sibling_ctx) "Tree.tagged_sibling_ctx")(* !! no tagged_sibling in Tree.ml *) + (mk_fun (Tree.select_sibling_ctx) "Tree.select_sibling_ctx")(* !! no select_sibling in Tree.ml *) (mk_fun (Tree.tagged_foll_ctx) "Tree.tagged_foll_ctx") - (mk_fun (fun _ -> Tree.node_sibling_ctx) "[SelectFoll]Tree.node_sibling_ctx")(* !! no select_foll *) - (mk_fun (Tree.node_sibling_ctx) "Tree.node_sibling_ctx") + (mk_fun (Tree.select_foll_ctx) "Tree.select_foll_ctx")(* !! no select_foll *) + (mk_fun (Tree.next_sibling_ctx) "Tree.node_sibling_ctx") module SetTagKey = @@ -621,7 +620,7 @@ END in let null_result() = (pempty,Array.make slot_size RS.empty) in - let rec loop t slist ctx = + let rec loop t slist ctx = if Tree.is_nil t then null_result() else get_trans t slist (Tree.tag t) ctx and loop_tag tag t slist ctx = @@ -673,7 +672,8 @@ END let empty_res = null_result() in let cont = match f_kind,n_kind with - | `NIL,`NIL -> (fun _ _ -> null_result()) + | `NIL,`NIL -> + (fun _ _ -> eval_fold2_slist fl_list t empty_res empty_res ) | _,`NIL -> ( match f_kind with |`TAG(tag) -> @@ -682,12 +682,12 @@ END | `ANY -> (fun t _ -> eval_fold2_slist fl_list t empty_res (loop (first t) llist t)) - | _ -> assert false) + | _ -> assert false) | `NIL,_ -> ( match n_kind with |`TAG(tag) -> - (fun t ctx -> eval_fold2_slist fl_list t + (fun t ctx -> eval_fold2_slist fl_list t (loop_tag tag (next t ctx) rlist ctx) empty_res) | `ANY -> @@ -717,6 +717,12 @@ END (loop (next t ctx) rlist ctx) (loop (first t) llist t) ) | _ -> assert false + in + let cont = D_IF_( (fun t ctx -> + let a,b = cont t ctx in + register_trace t (slist,a,fl_list,first,next,ctx); + (a,b) + ) ,cont) in (CachedTransTable.add td_trans (tag,slist) cont;cont) in cont t ctx @@ -934,7 +940,7 @@ END | `TAG (tag) -> (*Tree.tagged_lowest t tag, fun tree -> Tree.tagged_next tree tag*) (Tree.tagged_desc tag t, fun tree -> Tree.tagged_foll_ctx tag tree t) - | `CONTAINS(_) -> (Tree.text_below t,fun tree -> Tree.text_next tree t) + | `CONTAINS(_) -> (Tree.first_child t,fun tree -> Tree.next_sibling_ctx tree t) | _ -> assert false in let tree2 = jump_fun tree1 in diff --git a/debug.ml b/debug.ml index 39b4fcf..7ae4e75 100644 --- a/debug.ml +++ b/debug.ml @@ -18,10 +18,12 @@ THEN module Loc = Camlp4.PreCast.Loc DEFINE D_IGNORE_(e1,e2) = (let () = e1 in ();e2) +DEFINE D_IF_(e1,e2) = e1 ELSE DEFINE D_IGNORE_(e1,e2) = (e2) +DEFINE D_IF_(e1,e2) = e2 END (* IFDEF DEBUG *) diff --git a/hlist.ml b/hlist.ml index cd3c90d..4b83668 100644 --- a/hlist.ml +++ b/hlist.ml @@ -16,6 +16,7 @@ module type S = sig val iter : (elt -> 'a) -> t -> unit val rev : t -> t val rev_map : (elt -> elt) -> t -> t + val length : t -> int end module Make ( H : Hcons.S ) : S with type elt = H.t = @@ -78,5 +79,5 @@ struct let rev l = fold cons l nil let rev_map f l = fold (fun x acc -> cons (f x) acc) l nil - + let length l = fold (fun _ c -> c+1) l 0 end diff --git a/hlist.mli b/hlist.mli index 15bafe5..7796833 100644 --- a/hlist.mli +++ b/hlist.mli @@ -15,6 +15,7 @@ module type S = sig val iter : (elt -> 'a) -> t -> unit val rev : t -> t val rev_map : (elt -> elt) -> t -> t + val length : t -> int end module Make (H : Hcons.S) : S with type elt = H.t diff --git a/html_header.ml b/html_header.ml deleted file mode 100644 index f2b1a8e..0000000 --- a/html_header.ml +++ /dev/null @@ -1,211 +0,0 @@ -let html_header = format_of_string - " - - - - - - - - -" -let html_footer = " - -" - let h_trace = Hashtbl.create 4096 - let register_trace t x = Hashtbl.add h_trace (Tree.id t) x - - - let output_trace a t file results = - let h_auto = 6+ (Hashtbl.fold (fun _ l a -> (List.length l)+a) a.phi 0) in - let max_tt = ref 0 in - let outc = open_out file in - let outf = Format.formatter_of_out_channel outc in - let strf = Format.str_formatter in - let pr_str x = Format.fprintf strf x in - let pr_out x = Format.fprintf outf x in - let rec loop t = - if not (Tree.is_nil t) then - let tooltip,selected = try - let (inconf,outconf,leftres,rightres,trans) = Hashtbl.find h_trace (Tree.id t) in - let selected = IntSet.mem (Tree.id t) results in - pr_str "
Subtree %i, tag='%s', internal node = %s\nEntered with configuration:\n" - (Tree.id t) (Tree.id t) (Tag.to_string (Tree.tag t)) (Tree.dump_node t); - iter_pl (fun s -> pr_st strf (Ptset.elements s)) inconf; - pr_str "%s" "\nLeft with configuration:\n"; - iter_pl (fun s -> pr_st strf (Ptset.elements s)) outconf; - pr_str "%s" "\nAccept states for left child:\n"; - iter_pl (fun s -> pr_st strf (Ptset.elements s)) leftres; - pr_str "%s" "\nAccept states for right child:\n"; - iter_pl (fun s -> pr_st strf (Ptset.elements s)) rightres; - pr_str "%s" "\nTriggered transitions:\n"; - pr_str "%s" ""; - List.iter (fun fl -> - pr_str "%s" ""; - max_tt := max !max_tt (form_list_length fl); - ) trans; - pr_str "%s" "
";pr_frmlst strf fl;pr_str "
\n"; - pr_str "In result set : %s\n
" (if selected then "Yes" else "No"); - Format.flush_str_formatter(),selected - with - Not_found -> "",false - in - let tag = Tree.tag t in - let div_class = (if (tooltip = "") then "skipped" else (if selected then "selected" else "touched"))^ - (if tag == Tag.pcdata then "_text" else"") - in - if tag == Tag.pcdata then - pr_out "
%s%s
"div_class (Tree.get_text t) tooltip - else begin - if (Tree.is_nil (Tree.first_child t)) - then - pr_out "
<%s/>%s
" - div_class (Tree.id t) (Tree.id t) (Tag.to_string tag) tooltip - else begin - pr_out "
<%s>%s
" - div_class (Tree.id t) (Tree.id t) (Tag.to_string tag) tooltip; - loop (Tree.first_child t); - pr_out "
</%s>
" div_class (Tag.to_string tag); - end; - end; - loop (Tree.next_sibling t); - in - let max_tt = 25*(!max_tt + 12)+20 in - let height = max max_tt (25*h_auto) in - pr_out html_header height height height height; - pr_out "%s" "
"; - dump outf a; - pr_out "%s" "
"; - pr_out "%s" "
"; - loop t; - pr_out "%s" html_footer; - pr_out "%!"; - close_out outc diff --git a/html_trace.ml b/html_trace.ml new file mode 100644 index 0000000..c5ed980 --- /dev/null +++ b/html_trace.ml @@ -0,0 +1,268 @@ +let html_header = format_of_string + " + + + + + + + + +" +let html_footer = "
+ +" +let h_trace = Hashtbl.create 4096 +let register_trace t x = Hashtbl.add h_trace (Tree.id t) x +let h_fname = Hashtbl.create 401 + +let register_funname f s = Hashtbl.add h_fname (Hashtbl.hash f) s +let get_funname f = try Hashtbl.find h_fname (Hashtbl.hash f) with _ -> "[anon_fun]" +let tag_to_str tag = + let s = Tag.to_string tag in + let num =ref 0 in + for i=0 to (String.length s)-1 do + match s.[i] with + | '<' | '>' -> incr num + | _ -> () + done; + if !num == 0 then s + else + let j = ref 0 in + let ns = String.create ((String.length s)+3 * !num) in + for i=0 to (String.length s)-1 do + match s.[i] with + | '<' | '>' as x -> + ns.[!j] <- '&'; + ns.[!j+1] <- (if x == '>' then 'g' else 'l') ; + ns.[!j+2] <- 't'; + ns.[!j+3] <- ';'; + j:= !j+4 + | _ -> ns.[!j] <- s.[i]; incr j + done; + ns + + +let output_trace a t file results = + let h_auto = 6+ (Hashtbl.fold (fun _ l a -> (List.length l)+a) a.trans 0) in + let max_tt = ref 0 in + let outc = open_out file in + let outf = Format.formatter_of_out_channel outc in + let strf = Format.str_formatter in + let pr_str x = Format.fprintf strf x in + let pr_out x = Format.fprintf outf x in + let rec loop t = + if not (Tree.is_nil t) then + let id = Tree.id t in + let tag = Tree.tag t in + let tooltip,selected = try + let (inconf,outconf,trans,first_fun,next_fun,ctx) = Hashtbl.find h_trace id in + let selected = IntSet.mem id results in + pr_str "
Subtree %i, tag='%s', internal node = %s\n" + id id (tag_to_str tag) (Tree.dump_node t); + + pr_str "Context node is %i, tag='%s', internal node = '%s'\n" + (Tree.id ctx) (tag_to_str (Tree.tag ctx)) (Tree.dump_node ctx); + pr_str "%s" "\nEntered with configuration:\n"; + SList.iter (fun s -> StateSet.print strf s) inconf; + pr_str "%s" "\nLeft with configuration:\n"; + SList.iter (fun s -> StateSet.print strf s) outconf; + (let ft = first_fun t in + pr_str "\nLeft successor is: id=%i, tag='%s', internal node = '%s'\n" + (Tree.id ft) (Tree.id ft) (Tree.id ft) (tag_to_str (Tree.tag ft)) (Tree.dump_node ft); + pr_str "Moving with : %s (tree=%i)\n" (get_funname first_fun) id; + ); + (let nt = next_fun t ctx in + pr_str "\nRight successor is: id=%i, tag='%s', internal node = '%s'\n" + (Tree.id nt) (Tree.id nt) (Tree.id nt) (tag_to_str (Tree.tag nt)) (Tree.dump_node nt); + pr_str "Moving with : %s (tree=%i) (ctx=%i)\n" (get_funname first_fun) id (Tree.id ctx); + ); + pr_str "%s" "\nTriggered transitions:\n"; + pr_str "%s" ""; + List.iter (fun fl -> + pr_str "%s" ""; + max_tt := max !max_tt (Formlist.length fl); + ) trans; + pr_str "%s" "
";Formlist.print strf fl;pr_str "
\n"; + pr_str "In result set : %s\n
" (if selected then "Yes" else "No"); + Format.flush_str_formatter(),selected + with + Not_found -> "",false + in + let div_class = (if (tooltip = "") then "skipped" else (if selected then "selected" else "touched"))^ + (if tag == Tag.pcdata || tag== Tag.attribute_data then "_text" else"") + in + if tag == Tag.pcdata || tag== Tag.attribute_data then + pr_out "" div_class id (Tree.get_text t) tooltip + else begin + if (Tree.is_nil (Tree.first_child t)) + then + pr_out "" + div_class id id id (tag_to_str tag) tooltip + else begin + pr_out "" + div_class id id id (tag_to_str tag) tooltip; + loop (Tree.first_child t); + if (tooltip="") then + pr_out "
</%s>
" div_class (tag_to_str tag) + else + pr_out "
</%s>
" id id div_class (tag_to_str tag); + end; + end; + loop (Tree.next_sibling t); + in + let max_tt = 25*(!max_tt + 15)+20 in + let height = max max_tt (25*h_auto) in + pr_out html_header height height height height; + pr_out "%s" "
"; + pr_out "query: %s\n" a.query_string; + dump outf a; + pr_out "%s" "

"; + pr_out "%s" "
"; + loop t; + pr_out "%s" html_footer; + pr_out "%!"; + close_out outc + diff --git a/tag.ml b/tag.ml index c500dab..f9c0275 100644 --- a/tag.ml +++ b/tag.ml @@ -17,8 +17,11 @@ external tag_name : pool -> t -> string = "caml_xml_tree_tag_name" let nullt = null_tag () (* Defined in XMLTree.cpp *) -let pcdata = 1 -let attribute = 0 +let document_node = 0 +let attribute = 1 +let pcdata = 2 +let attribute_data= 3 + let pool = Weak.create 1 @@ -31,6 +34,8 @@ let get_pool () = match Weak.get pool 0 with let tag s = match s with | "<$>" -> pcdata | "<@>" -> attribute + | "" -> document_node + | "<@$>" -> attribute_data | _ -> register_tag (get_pool()) s let compare = (-) @@ -41,6 +46,7 @@ let hash x = x let to_string t = if t == pcdata then "<$>" + else if t == attribute_data then "<@$>" else if t == attribute then "<@>" else if t == nullt then "" else tag_name (get_pool()) t diff --git a/tag.mli b/tag.mli index b5e0ad2..bc3ee55 100644 --- a/tag.mli +++ b/tag.mli @@ -1,8 +1,13 @@ type t = int type pool val tag : string -> t -val pcdata : t + +val document_node : t val attribute : t +val pcdata : t +val attribute_data : t + + val init : pool -> unit val to_string : t -> string val compare : t -> t -> int diff --git a/tests/test.xml b/tests/test.xml index f1dfe41..4c57628 100644 --- a/tests/test.xml +++ b/tests/test.xml @@ -1,7 +1,4 @@ - - - - - - +12 + +3 diff --git a/tree.ml b/tree.ml index 1ed56b2..e3e8fe2 100644 --- a/tree.ml +++ b/tree.ml @@ -41,7 +41,7 @@ external text_count_contains : tree -> string -> int = "caml_text_collection_cou external text_count : tree -> string -> int = "caml_text_collection_count" external text_contains : tree -> string -> [`Text ] node array = "caml_text_collection_contains" external text_unsorted_contains : tree -> string -> unit = "caml_text_collection_unsorted_contains" -external get_cached_text : tree -> [`Text] node -> string = "caml_text_collection_get_cached_text" +external text_get_cached_text : tree -> [`Text] node -> string = "caml_text_collection_get_cached_text" external tree_serialize : tree -> string -> unit = "caml_xml_tree_serialize" @@ -56,7 +56,10 @@ external tree_parent : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_par external tree_parent_doc : tree -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_parent_doc" external tree_prev_doc : tree -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_prev_doc" external tree_first_child : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_child" +external tree_tagged_child : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_child" external tree_next_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_sibling" +external tree_tagged_sibling : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_sibling" + external tree_prev_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_prev_sibling" external tree_is_leaf : tree -> [`Tree] node -> bool = "caml_xml_tree_is_leaf" external tree_last_child : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_last_child" @@ -76,10 +79,10 @@ external tree_doc_ids : tree -> [`Tree ] node -> [`Text ] node * [`Text ] node = let text_size tree = int_of_node (snd ( tree_doc_ids tree (Obj.magic 0) )) -let get_cached_text t x = +let text_get_cached_text t x = if x == -1 then "" else - get_cached_text t x + text_get_cached_text t x external tree_text_xml_id : tree -> [`Text ] node -> int = "caml_xml_tree_text_xml_id" @@ -88,24 +91,41 @@ external tree_is_ancestor : tree -> [`Tree ] node -> [`Tree ] node -> bool = "ca external tree_tagged_desc : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_desc" external tree_tagged_foll_below : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_below" external tree_subtree_tags : tree -> [`Tree ] node -> Tag.t -> int = "caml_xml_tree_subtree_tags" -(* -external tree_select_below : tree -> [`Tree ] node -> Ptset.int_vector -> Ptset.int_vector -> [`Tree ] node = "caml_xml_tree_select_below" -external tree_select_desc_only : tree -> [`Tree ] node -> Ptset.int_vector -> [`Tree ] node = "caml_xml_tree_select_desc_only" -external tree_select_next : tree -> [`Tree ] node -> Ptset.int_vector -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_select_next" -external tree_select_foll_only : tree -> [`Tree ] node -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_select_foll_only" -external tree_select_desc_or_foll_only : tree -> [`Tree ] node -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_select_foll_only" *) - -type descr = - | Nil - | Node of [`Tree] node - | Text of [`Text] node * [`Tree] node + + + +type int_vector +external int_vector_alloc : int -> int_vector = "caml_int_vector_alloc" +external int_vector_length : int_vector -> int = "caml_int_vector_length" +external int_vector_set : int_vector -> int -> int -> unit = "caml_int_vector_set" + +external tree_select_child : tree -> [`Tree ] node -> int_vector -> [`Tree] node = "caml_xml_tree_select_child" +external tree_select_foll_sibling : tree -> [`Tree ] node -> int_vector -> [`Tree] node = "caml_xml_tree_select_foll_sibling" +external tree_select_desc : tree -> [`Tree ] node -> int_vector -> [`Tree] node = "caml_xml_tree_select_desc" +external tree_select_foll_below : tree -> [`Tree ] node -> int_vector -> [`Tree] node -> [`Tree] node = "caml_xml_tree_select_foll_below" + + +module HPtset = Hashtbl.Make(Ptset.Int) + +let vector_htbl = HPtset.create MED_H_SIZE + +let ptset_to_vector s = + try + HPtset.find vector_htbl s + with + Not_found -> + let v = int_vector_alloc (Ptset.Int.cardinal s) in + let _ = Ptset.Int.fold (fun e i -> int_vector_set v i e;i+1) s 0 in + HPtset.add vector_htbl s v; v + type t = { doc : tree; - node : descr; + node : [`Tree] node; ttable : (Tag.t,(Ptset.Int.t*Ptset.Int.t)) Hashtbl.t; } let text_size t = text_size t.doc + module MemUnion = Hashtbl.Make (struct type t = Ptset.Int.t*Ptset.Int.t let equal (x,y) (z,t) = (Ptset.Int.equal x z)&&(Ptset.Int.equal y t) @@ -137,14 +157,13 @@ let collect_tags tree = Hashtbl.add h_add k r;r in let h = Hashtbl.create BIG_H_SIZE in - let sing = Ptset.Int.singleton Tag.pcdata in let update t sb sa = let sbelow,safter = try Hashtbl.find h t with | Not_found -> - (sing,sing) + (Ptset.Int.empty,Ptset.Int.empty) in Hashtbl.replace h t (pt_cup sbelow sb, pt_cup safter sa) in @@ -158,9 +177,7 @@ let collect_tags tree = update tag below1 after2; pt_add tag (pt_cup below1 below2), (pt_add tag after1) in - let b,a = loop (tree_root tree) Ptset.Int.empty in - update Tag.pcdata b a; - h + let _ = loop (tree_root tree) Ptset.Int.empty in h @@ -197,7 +214,7 @@ let init_naive_contains t s = let rec loop n acc l = if n >= j then acc,l else - let s = get_cached_text t.doc n + let s = text_get_cached_text t.doc n in if matching s then loop (n+1) (n::acc) (l+1) @@ -216,27 +233,16 @@ module DocIdSet = struct let compare = compare_node end) end -let is_nil t = t.node == Nil +let is_nil t = t.node == nil -let is_node t = t.node != Nil +let is_node t = t.node != nil let node_of_t t = let _ = Tag.init (Obj.magic t) in let table = collect_tags t in -(* - let _ = Hashtbl.iter (fun t (sb,sa) -> - Printf.eprintf "'%s' -> { " (Tag.to_string t); - Ptset.iter (fun i -> Printf.eprintf "'%s' " (Tag.to_string i)) sb; - Printf.eprintf "}\n { "; - Ptset.iter (fun i -> Printf.eprintf "'%s' " (Tag.to_string i)) sa; - Printf.eprintf "} \n----------------------------------\n"; - ) table in - let i,j = tree_doc_ids t (tree_root t) in - Printf.eprintf "%i docs, range from %i to %i\n%!" (Array.length s) i j; - Array.iter (fun i -> print_endline (">>>" ^ i ^ "<<<")) s; *) { doc= t; - node = Node(tree_root t); + node = tree_root t; ttable = table; } let finalize _ = Printf.eprintf "Release the string list !\n%!" @@ -266,257 +272,49 @@ let load ?(sample=64) str = let tag_pool t = pool t.doc -let compare a b = match a.node,b.node with - | Nil, Nil -> 0 - | Nil,_ -> 1 - | _ , Nil -> -1 - | Node(i),Node(j) -> compare_node i j - | Text(i,_), Text(j,_) -> compare_node i j - | Node(i), Text(_,j) -> compare_node i j - | Text(_,i), Node(j) -> compare_node i j - -let equal a b = (compare a b) == 0 - - -let norm (n : [`Tree ] node ) = if n == -1 then Nil else Node (n) - +let compare a b = a.node - b.node + +let equal a b = a.node == b.node + let nts = function - Nil -> "Nil" - | Text (i,j) -> Printf.sprintf "Text (%i, %i)" i j - | Node (i) -> Printf.sprintf "Node (%i)" i + -1 -> "Nil" + | i -> Printf.sprintf "Node (%i)" i let dump_node t = nts t.node -let mk_nil t = { t with node = Nil } -let root n = { n with node = norm (tree_root n.doc) } +let mk_nil t = { t with node = nil } +let root n = { n with node = tree_root n.doc } -let is_root n = match n.node with - | Node(t) -> (int_of_node t) == 0 - | _ -> false - -let is_left n = match n.node with - | Node(t) -> (tree_is_first_child n.doc t) && (equal_node nil (tree_prev_text n.doc t)) - | Text(_,t) -> tree_is_nil t || tree_is_first_child n.doc t - | _ -> false - -let is_below_right t1 t2 = - match (t1.node,t2.node) with - | Nil,_ | _,Nil -> false - | Node(i1), Node(i2) -> - tree_is_ancestor t1.doc (tree_parent t1.doc i1) i2 - && not (tree_is_ancestor t1.doc i1 i2) - | Text(_,i1),Node(i2) -> i1 == i2 || - (tree_is_ancestor t1.doc (tree_parent t1.doc i1) i2 && i1 < i2) - | Text(_,i1),Text(i,_) -> - let x,y = tree_doc_ids t1.doc i1 in - i >= x && i <= y - | Node(i1), Text(i,_) -> - let i2 = tree_next_sibling t1.doc i1 in - let x,y = tree_doc_ids t1.doc i2 in - i >= x && i <= y - -let parent n = - let node' = - match n.node with (* inlined parent *) - | Node(t) when (int_of_node t)== 0 -> Nil - | Node(t) -> - let txt = tree_prev_text n.doc t in - if text_is_empty n.doc txt then - let ps = tree_prev_sibling n.doc t in - if tree_is_nil ps - then - Node(tree_parent n.doc t) - else Node(ps) - else - Text(txt,t) - | Text(i,t) -> - let ps = tree_prev_doc n.doc i in - if tree_is_nil ps - then Node (tree_parent_doc n.doc i) - else Node(ps) - | _ -> failwith "parent" - in - { n with node = node' } - -let node_child n = - match n.node with - | Node i -> { n with node= norm(tree_first_child n.doc i) } - | _ -> { n with node = Nil } - -let node_sibling n = - match n.node with - | Node i -> { n with node= norm(tree_next_sibling n.doc i) } - | _ -> { n with node = Nil } - -let node_sibling_ctx n _ = - match n.node with - | Node i -> { n with node= norm(tree_next_sibling n.doc i) } - | _ -> { n with node = Nil } - - -let first_child n = - let node' = - match n.node with - | Node (t) -> - let fs = tree_first_child n.doc t in - if equal_node nil fs - then - let txt = tree_my_text n.doc t in - if equal_node nil txt - then Nil - else Text(txt,nil) - else - let txt = tree_prev_text n.doc fs in - if equal_node nil txt - then Node(fs) - else Text(txt, fs) - | Text(_,_) -> Nil - | Nil -> failwith "first_child" - in - { n with node = node'} +let is_root n = n.node == (tree_root n.doc) -let next_sibling n = - let node' = - match n.node with - | Text (_,ns) -> norm ns - | Node(t) -> - let ns = tree_next_sibling n.doc t in - let txt = tree_next_text n.doc t in - if equal_node nil txt - then norm ns - else Text(txt, ns) - | Nil -> failwith "next_sibling" - in - { n with node = node'} - -let next_sibling_ctx n _ = next_sibling n - -let left = first_child -let right = next_sibling - -let id t = - match t.node with - | Node(n) -> tree_node_xml_id t.doc n - | Text(i,_) -> tree_text_xml_id t.doc i - | _ -> -1 - -let tag t = - match t.node with - | Text(_) -> Tag.pcdata - | Node(n) -> tree_tag_id t.doc n - | Nil -> Tag.nullt - -(* -let select_next tb tf t s = - match s.node with - | Node (below) -> begin - match t.node with - | Node( n) -> - { t with node = norm (tree_select_next t.doc n (Ptset.Int.to_int_vector tb) (Ptset.Int.to_int_vector tf) below) } - | Text (i,n) when equal_node nil n -> - let p = tree_parent_doc t.doc i in - { t with node = norm (tree_select_next t.doc p (Ptset.Int.to_int_vector tb) (Ptset.Int.to_int_vector tf) below) } - | Text(_,n) -> - if Ptset.mem (tree_tag_id t.doc n) (Ptset.Int.union tb tf) - then { t with node=Node(n) } - else - let vb = Ptset.Int.to_int_vector tb in - let vf = Ptset.Int.to_int_vector tf in - let node = - let dsc = tree_select_below t.doc n vb vf in - if equal_node nil dsc - then tree_select_next t.doc n vb vf below - else dsc - in - { t with node = norm node } - | _ -> {t with node = Nil } - end - - | _ -> { t with node = Nil } +let is_left n = tree_is_first_child n.doc n.node - +let is_below_right t1 t2 = tree_is_ancestor t1.doc (tree_parent t1.doc t1.node) t2.node +let parent n = { n with node = tree_parent n.doc n.node } - let select_foll_only tf t s = - match s.node with - | Node (below) -> - begin - match t.node with - | Node(n) -> - { t with node= norm (tree_select_foll_only t.doc n (Ptset.Int.to_int_vector tf) below) } - | Text(i,n) when equal_node nil n -> - let p = tree_parent_doc t.doc i in - { t with node= norm (tree_select_foll_only t.doc p (Ptset.Int.to_int_vector tf) below) } - | Text(_,n) -> - if Ptset.mem (tree_tag_id t.doc n) tf - then { t with node=Node(n) } - else - let vf = Ptset.Int.to_int_vector tf in - let node = - let dsc = tree_select_desc_only t.doc n vf in - if tree_is_nil dsc - then tree_select_foll_only t.doc n vf below - else dsc - in - { t with node = norm node } - | _ -> { t with node = Nil } - end - | _ -> {t with node=Nil } - -let select_below tc td t= - match t.node with - | Node( n) -> - let vc = Ptset.Int.to_int_vector tc - in - let vd = Ptset.Int.to_int_vector td - in - { t with node= norm(tree_select_below t.doc n vc vd) } - | _ -> { t with node=Nil } - - -let select_desc_only td t = - match t.node with - | Node(n) -> - let vd = Ptset.Int.to_int_vector td - in - { t with node = norm(tree_select_desc_only t.doc n vd) } - | _ -> { t with node = Nil } - -*) -let tagged_desc tag t = - match t.node with - | Node(n) -> - { t with node = norm(tree_tagged_desc t.doc n tag) } - | _ -> { t with node = Nil } - - -let tagged_foll_ctx tag t s = - match s.node with - | Node (below) -> - begin - match t.node with - | Node(n) -> - { t with node= norm (tree_tagged_foll_below t.doc n tag below) } - | Text(i,n) when equal_node nil n -> - let p = tree_prev_doc t.doc i in - { t with node= norm (tree_tagged_foll_below t.doc p tag below) } - | Text(_,n) -> - if (tree_tag_id t.doc n) == tag - then { t with node=Node(n) } - else - let node = - let dsc = tree_tagged_desc t.doc n tag in - if tree_is_nil dsc - then tree_tagged_foll_below t.doc n tag below - else dsc - in - { t with node = norm node } - | _ -> { t with node = Nil } - end - | _ -> {t with node=Nil } +let first_child n = { n with node = tree_first_child n.doc n.node } +let tagged_child tag n = { n with node = tree_tagged_child n.doc n.node tag } +let select_child ts n = { n with node = tree_select_child n.doc n.node (ptset_to_vector ts) } +let next_sibling n = { n with node = tree_next_sibling n.doc n.node } +let tagged_sibling tag n = { n with node = tree_tagged_sibling n.doc n.node tag } +let select_sibling ts n = { n with node = tree_select_foll_sibling n.doc n.node (ptset_to_vector ts) } +let next_sibling_ctx n _ = next_sibling n +let tagged_sibling_ctx tag n _ = tagged_sibling tag n +let select_sibling_ctx ts n _ = select_sibling ts n +let id t = tree_node_xml_id t.doc t.node + +let tag t = if t.node == nil then Tag.nullt else tree_tag_id t.doc t.node + +let tagged_desc tag n = { n with node = tree_tagged_desc n.doc n.node tag } +let select_desc ts n = { n with node = tree_select_desc n.doc n.node (ptset_to_vector ts) } + +let tagged_foll_ctx tag t ctx = + { t with node = tree_tagged_foll_below t.doc t.node tag ctx.node } +let select_foll_ctx ts n ctx = { n with node = tree_select_foll_below n.doc n.node (ptset_to_vector ts) ctx.node } let last_idx = ref 0 let array_find a i j = @@ -531,207 +329,68 @@ let array_find a i j = else loop !last_idx i j - -let text_below t = - let l = Array.length !contains_array in - match t.node with - | Node(n) -> - let i,j = tree_doc_ids t.doc n in - let id = if l == 0 then i else (array_find !contains_array i j) - in -(* Printf.printf "Looking for text below node %i with tag %s in range %i %i, in array : [|\n%!" - n (Tag.to_string (tree_tag_id t.doc n)) i j; - Array.iter (fun i -> Printf.printf "%i " (int_of_node i )) !contains_array; - Printf.printf "|]\nResult is %i\n%!" id; *) - if id == nil then - { t with node=Nil } - else - { t with node = Text(id, tree_next_sibling t.doc (tree_prev_doc t.doc id)) } - | _ -> (*Printf.printf "Here\n%!"; *) - { t with node = Nil } - -let text_next t root = - let l = Array.length !contains_array in - let inf = match t.node with - | Node(n) -> snd(tree_doc_ids t.doc n)+1 - | Text(i,_) -> i+1 - | _ -> assert false - in - match root.node with - | Node (n) -> - let _,j = tree_doc_ids t.doc n in - let id = if l == 0 then if inf > j then nil else inf - else array_find !contains_array inf j - in - if id == nil then { t with node= Nil } - else - { t with node = Text(id,tree_next_sibling t.doc (tree_prev_doc t.doc id)) } - | _ -> { t with node = Nil} - - -(* - let subtree_tags t tag = - match t with - { doc = d; node = Node(NC n) } -> - subtree_tags d n tag - | _ -> 0 - - let select_desc_array = ref [| |] - let idx = ref 0 - - let init_tagged_next t tagid = - let l = subtree_tags (root t) tagid - in - tagged_desc_array := Array.create l { t with node= Nil }; - let i = ref 0 in - let rec collect t = - if is_node t then begin - if tag t == tagid then - begin - !tagged_desc_array.(!i) <- t; - incr i; - end; - collect (first_child t); - collect (next_sibling t) - end; - in - collect t; - idx := 0 - - let print_id ppf v = - let pr x= Format.fprintf ppf x in - match v with - { node=Nil } -> pr "NULLT: -1" - | { node=String(i) } | { node=Node(SC(i,_)) } -> pr "DocID: %i" (int_of_node i) - | { node=Node(NC(i)) } -> pr "Node: %i" (int_of_node i) - - - -(* let tagged_next t tag = - if !idx >= Array.length !tagged_desc_array - then {t with node=Nil} - else - let r = !tagged_desc_array.(!idx) - in - incr idx; r -*) - - - let has_tagged_foll t tag = is_node (tagged_foll t tag) - let has_tagged_desc t tag = is_node (tagged_desc t tag) - - let contains t s = - Array.fold_left (fun a i -> DocIdSet.add i a) DocIdSet.empty (Text.contains t.doc s) - - - let contains_old t s = - let regexp = Str.regexp_string s in - let matching arg = - try - let _ = Str.search_forward regexp arg 0; - in true - with _ -> false - in - let rec find t acc = match t.node with - | Nil -> acc - | String i -> - if matching (string t) then DocIdSet.add i acc else acc - | Node(_) -> (find (left t )) ((find (right t)) acc) - in - find t DocIdSet.empty - - - let contains_iter t s = - let regexp = Str.regexp_string s in - let matching arg = - try - let _ = Str.search_forward regexp arg 0; - in true - with _ -> false - in - let size = Text.size t.doc in - let rec find acc n = - if n == size then acc - else - find - (if matching (Text.get_cached_text t.doc (Obj.magic n)) then - DocIdSet.add (Obj.magic n) acc - else acc) (n+1) - in - find DocIdSet.empty 0 - - - - - let count_contains t s = Text.count_contains t.doc s -*) let count t s = text_count t.doc s -(* - let is_left t = - if is_root t then false - else - if tag (parent t) == Tag.pcdata then false - else - let u = left (parent t) in - (id t) == (id u) -*) + let print_xml_fast outc t = let rec loop ?(print_right=true) t = - match t.node with - | Nil -> () - | Text(i,n) -> output_string outc (get_cached_text t.doc i); + if t.node != nil + then + let tagid = tree_tag_id t.doc t.node in + if tagid==Tag.pcdata + then output_string outc (text_get_cached_text t.doc t.node); if print_right - then loop (right t) - | Node (n) -> - let tg = Tag.to_string (tag t) in - let l = left t - and r = right t - in - output_char outc '<'; - output_string outc tg; - ( match l.node with - Nil -> output_string outc "/>" - | Node(_) when Tag.equal (tag l) Tag.attribute -> - (loop_attributes (left l); - match (right l).node with - | Nil -> output_string outc "/>" - | _ -> - output_char outc '>'; - loop (right l); - output_string outc "' ) - | _ -> + then loop (next_sibling t) + + else + let tagstr = Tag.to_string tagid in + let l = first_child t + and r = next_sibling t + in + output_char outc '<'; + output_string outc tagstr; + if l.node == nil then output_string outc "/>" + else + if (tag l) == Tag.attribute then + begin + loop_attributes (first_child l); + if (next_sibling l).node == nil then output_string outc "/>" + else + begin + output_char outc '>'; + loop (next_sibling l); + output_string outc "'; + end; + end + else + begin output_char outc '>'; loop l; output_string outc "' - );if print_right then loop r - and loop_attributes a = - match a.node with - | Node(_) -> - let value = - match (left a).node with - | Text(i,_) -> (get_cached_text a.doc i) - | _ -> assert false - in - output_char outc ' '; - output_string outc (Tag.to_string (tag a)); - output_string outc "=\""; - output_string outc value; - output_char outc '"'; - loop_attributes (right a) - | _ -> () + output_string outc tagstr; + output_char outc '>'; + end; + if print_right then loop r + and loop_attributes a = + let s = (Tag.to_string (tag a)) in + let attname = String.sub s 3 ((String.length s) -3) in + output_char outc ' '; + output_string outc attname; + output_string outc "=\""; + output_string outc (text_get_cached_text t.doc + (tree_my_text a.doc (first_child a).node)); + output_char outc '"'; + loop_attributes (next_sibling a) in loop ~print_right:false t let print_xml_fast outc t = - if Tag.to_string (tag t) = "" then + if (tag t) = Tag.document_node then print_xml_fast outc (first_child t) - else print_xml_fast outc t + else print_xml_fast outc t let tags_below t tag = @@ -742,99 +401,46 @@ let tags_after t tag = let tags t tag = Hashtbl.find t.ttable tag -let tagged_lowest t tag = - let rec loop_lowest i = - let j = tree_tagged_desc t.doc i tag in - if tree_is_nil j then i else loop_lowest j - in - match t.node with - | Node i -> - let j = loop_lowest i in - { t with - node = norm( - if tree_is_nil j then - if (tree_tag_id t.doc i) == tag - then i - else j - else j) } - | Nil -> t - | _ -> assert false - - -let tagged_next t tag = - match t.node with - | Node(i) -> - let n = tree_tagged_foll_below t.doc i tag (Obj.magic 0) - in - if tree_is_nil n then mk_nil t - else - tagged_lowest { t with node = Node n } tag - | Nil -> t - | _ -> assert false let rec binary_parent t = - let res = - match t.node with - | Node(0) -> { t with node = Nil } - | Node(i) -> - let j = tree_prev_sibling t.doc i in - if tree_is_nil j then - let idoc = tree_prev_text t.doc i in - if equal_node nil idoc then - { t with node = Node (tree_parent t.doc i) } - else - { t with node = Text(idoc,i) } - else - let idoc = tree_prev_text t.doc i in - if equal_node nil idoc then - { t with node = Node (j) } - else { t with node = Text(idoc,i) } - | Text(d,i) -> - if tree_is_nil i then - let n = tree_parent_doc t.doc d in - let lc = tree_last_child t.doc n in - if tree_is_nil lc then {t with node = Node n } - else { t with node = Node lc } - else - let j = tree_prev_sibling t.doc i in - if tree_is_nil j then - { t with node = Node (tree_parent t.doc i) } - else { t with node = Node j } - | Nil -> t - in match res.node with - | Text(idoc,t) -> - if (Array.length !contains_array) != 0 - then if in_array !contains_array idoc then res - else binary_parent res - else res - | _ -> res - -let benchmark_text t = - let doc = t.doc in - match (root t).node with - | Node i -> let _,size = tree_doc_ids doc i in - Printf.eprintf "%i will take ~ %i seconds\n%!" - size (size/10000) ; - let a = Array.create size "" in - for i = 0 to size - do - a.(i) <- text_get_tc_text t.doc (i+1) - done; a - | _ -> assert false + if tree_is_first_child t.doc t.node + then { t with node = tree_parent t.doc t.node } + else { t with node = tree_prev_sibling t.doc t.node } let doc_ids (t:t) : (int*int) = - (Obj.magic ( - match t.node with - | Node i -> tree_doc_ids t.doc i - | Text (i,_) -> (i,i) - | Nil -> (nil,nil) - )) - -let subtree_tags t tag = match t.node with - | Nil -> 0 - | Node(i) -> tree_subtree_tags t.doc i tag - | Text(_,i) -> tree_subtree_tags t.doc i tag - -let get_text t = match t.node with - | Text(i,_) -> get_cached_text t.doc i - | _ -> "" + (Obj.magic (tree_doc_ids t.doc t.node)) + +let subtree_tags t tag = + if t.node == nil then 0 else + tree_subtree_tags t.doc t.node tag + +let get_text t = + let tid = tree_my_text t.doc t.node in + if tid == nil then "" else + let a, b = tree_doc_ids t.doc (tree_root t.doc) in + let _ = Printf.eprintf "Trying to take text %i of node %i in %i %i\n%!" tid t.node a b in + text_get_cached_text t.doc tid + + +let dump_tree fmt t = + let rec loop tree n = + if tree != nil then + let tag = (tree_tag_id t.doc tree ) in + let tagstr = Tag.to_string tag in + let tab = String.make n ' ' in + + if tag == Tag.pcdata || tag == Tag.attribute_data + then + Format.fprintf fmt "%s<%s>%s\n" + tab tagstr (text_get_cached_text t.doc (tree_my_text t.doc tree)) tagstr + else begin + Format.fprintf fmt "%s<%s>\n" tab tagstr; + loop (tree_first_child t.doc tree) (n+2); + Format.fprintf fmt "%s\n%!" tab tagstr; + end; + loop (tree_next_sibling t.doc tree) n + in + loop (tree_root t.doc) 0 +;; + + diff --git a/tree.mli b/tree.mli index d8a49c9..3f72894 100644 --- a/tree.mli +++ b/tree.mli @@ -16,39 +16,44 @@ val root : t -> t val is_root : t -> bool val parent : t -> t val first_child : t -> t +val tagged_child : Tag.t -> t -> t +val select_child : Ptset.Int.t -> t -> t + val next_sibling : t -> t + +val tagged_sibling : Tag.t -> t -> t +val tagged_sibling_ctx : Tag.t -> t -> t -> t + +val select_sibling : Ptset.Int.t -> t -> t +val select_sibling_ctx : Ptset.Int.t -> t -> t -> t + val next_sibling_ctx : t -> t -> t -val left : t -> t -val right : t -> t -val id : t -> int + val tag : t -> Tag.t -val text_below : t -> t -val text_next : t -> t -> t +val id : t -> int + val tagged_desc : Tag.t -> t -> t +val select_desc : Ptset.Int.t -> t -> t + val tagged_foll_ctx : Tag.t -> t -> t -> t -(* -val select_desc_only : Ptset.Int.t -> t -> t -val select_foll_only : Ptset.Int.t -> t -> t -> t -val select_below : Ptset.Int.t -> Ptset.Int.t -> t -> t -val select_next : Ptset.Int.t -> Ptset.Int.t -> t -> t -> t -*) +val select_foll_ctx : Ptset.Int.t -> t -> t -> t + val count : t -> string -> int val print_xml_fast : out_channel -> t -> unit -val node_child : t -> t -val node_sibling : t -> t -val node_sibling_ctx : t -> t -> t + val tags_below : t -> Tag.t -> Ptset.Int.t val tags_after : t -> Tag.t -> Ptset.Int.t val tags : t -> Tag.t -> Ptset.Int.t*Ptset.Int.t val is_below_right : t -> t -> bool val is_left : t -> bool -val tagged_lowest : t -> Tag.t -> t -val tagged_next : t -> Tag.t -> t + val binary_parent : t -> t -val benchmark_text : t -> string array + val count_contains : t -> string -> int val unsorted_contains : t -> string -> unit val text_size : t -> int val doc_ids : t -> int*int val subtree_tags : t -> Tag.t -> int val get_text : t -> string + +val dump_tree : Format.formatter -> t -> unit -- 2.17.1