From 9be0c0e2a5597148fdc3a3cca2bdaf69da0aa27d Mon Sep 17 00:00:00 2001 From: kim Date: Thu, 27 Nov 2008 23:39:49 +0000 Subject: [PATCH] . git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@21 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- OCamlDriver.cpp | 7 ++++- SXSIStorageInterface.cpp | 13 ++++++-- XMLDocShredder.cpp | 21 +++++-------- main.ml | 3 +- tests/test.xml | 12 +------- tree.ml | 66 ++++++++++++++++++++++++++++++++-------- tree.mli | 2 ++ 7 files changed, 82 insertions(+), 42 deletions(-) diff --git a/OCamlDriver.cpp b/OCamlDriver.cpp index b0e57d9..1898d5a 100644 --- a/OCamlDriver.cpp +++ b/OCamlDriver.cpp @@ -85,9 +85,14 @@ extern "C" CAMLprim value caml_xml_tree_text_collection(value tree){ extern "C" CAMLprim value caml_xml_tree_first_child(value tree, value id){ CAMLparam2(tree,id); - CAMLlocal1(res); CAMLreturn(Val_int (XMLTREE(tree)->FirstChild(TREENODEVAL(id)))); } + +extern "C" CAMLprim value caml_xml_tree_is_leaf(value tree, value id){ + CAMLparam2(tree,id); + CAMLreturn(Val_bool (XMLTREE(tree)->IsLeaf(TREENODEVAL(id)))); +} + extern "C" CAMLprim value caml_xml_tree_next_sibling(value tree, value id){ CAMLparam2(tree,id); CAMLreturn(Val_int (XMLTREE(tree)->NextSibling(TREENODEVAL(id)))); diff --git a/SXSIStorageInterface.cpp b/SXSIStorageInterface.cpp index bd21cd0..14c1d70 100644 --- a/SXSIStorageInterface.cpp +++ b/SXSIStorageInterface.cpp @@ -24,22 +24,29 @@ SXSIStorageInterface::~SXSIStorageInterface() void SXSIStorageInterface::newChild(string name) { + cerr << "New child " << name << "\n"; tree->NewOpenTag((unsigned char*) name.c_str()); } void SXSIStorageInterface::newText(string text) { - if (text.empty()) + if (text.empty()){ + cerr << "Calling newEmptyText()\n"; tree->NewEmptyText(); - else + } + else { + cerr << "Calling newText(" << text <<")\n"; tree->NewText((unsigned char*) text.c_str()); - } + + }; +} void SXSIStorageInterface::nodeFinished(string name) { + cerr << "Node Finished child " << name << "\n"; tree->NewClosingTag((unsigned char*) name.c_str()); } diff --git a/XMLDocShredder.cpp b/XMLDocShredder.cpp index 722c291..3daaf45 100644 --- a/XMLDocShredder.cpp +++ b/XMLDocShredder.cpp @@ -106,8 +106,7 @@ void XMLDocShredder::processStartElement() if (empty){ storageIfc_->newText(""); //myText - storageIfc_->nodeFinished(name); - storageIfc_->newText(""); //nextText + storageIfc_->nodeFinished(name); }; @@ -131,7 +130,6 @@ void XMLDocShredder::processPCDATA() if (reader_->has_value()) { - storageIfc_->newChild("<$>"); storageIfc_->newText(reader_->get_value()); last_text = true; } @@ -167,24 +165,22 @@ void XMLDocShredder::processAttributes() else { - storageIfc_->newChild(name); - storageIfc_->newChild("<$>"); - storageIfc_->newText(value); - storageIfc_->nodeFinished("<$>"); + storageIfc_->newText(""); //prevText + storageIfc_->newChild(name); + storageIfc_->newText(value); + storageIfc_->nodeFinished(name); } } while (reader_->move_to_next_attribute()); + storageIfc_->newText(""); //nextText storageIfc_->nodeFinished("<@>"); } void XMLDocShredder::processSignificantWhitespace() { - ustring value = reader_->get_value(); - + ustring value = reader_->get_value(); // each significant whitespace sequence constructs a text node - storageIfc_->newChild("<$>"); storageIfc_->newText(value); - } @@ -199,6 +195,7 @@ void XMLDocShredder::processEndDocument() { /* tell the storage interface that document parsing has finished, and structures * can now be written to disk. */ + storageIfc_->newText(""); storageIfc_->nodeFinished(""); storageIfc_->parsingFinished(); } @@ -239,10 +236,8 @@ void XMLDocShredder::processCDATASection() */ ustring value = reader_->get_value(); - storageIfc_->newChild("<$>"); storageIfc_->newText(value); last_text = true; - // storageIfc_->nodeFinished(); } diff --git a/main.ml b/main.ml index 10fd915..88363e0 100644 --- a/main.ml +++ b/main.ml @@ -24,7 +24,8 @@ let total_time () = List.fold_left (+.) 0. !l;; let test filename query output = Printf.eprintf "Parsing document : %!"; let v = time Tree.Binary.parse_xml_uri filename in - Tree.Binary.print_xml_fast stderr v + Tree.dump v; + Tree.Binary.print_xml_fast stderr v let main filename query output = diff --git a/tests/test.xml b/tests/test.xml index d83dfc0..87340f4 100644 --- a/tests/test.xml +++ b/tests/test.xml @@ -1,11 +1 @@ - - - - - - - - - - - +YYYYYYYYYYYYYAAAAAAAAAXXXXXXXXXXXX diff --git a/tree.ml b/tree.ml index 53872b3..219ee4b 100644 --- a/tree.ml +++ b/tree.ml @@ -240,10 +240,17 @@ struct (* Todo *) external nullt : unit -> [`Text ] node = "caml_xml_tree_nullt" let nil = nullt () - external get_text : t -> [`Text] node -> string = "caml_text_collection_get_text" - let is_empty _ (n : [`Text] node) = equal nil n + external get_text1 : t -> [`Text] node -> string = "caml_text_collection_get_text" + + let get_text t n = Printf.printf "@@@@@@%i\n%!" (Obj.magic n); + if equal nil n then "" + else get_text1 t n + + let is_empty t (n : [`Text] node) = (get_text t n) = "" + + end + - end module Tree = struct @@ -271,17 +278,42 @@ struct let is_last t n = equal nil (next_sibling t n) external prev_text : t -> [`Tree] node -> [`Text ] node = "caml_xml_tree_prev_text" + let prev_text t id = Printf.eprintf "Calling PrevText for node %i with result" (Obj.magic id); + let did = if is_nil id then Text.nil else prev_text t id + in Printf.eprintf " %i!!!\n%!" (Obj.magic did); did + + + external my_text : t -> [`Tree] node -> [`Text ] node = "caml_xml_tree_my_text" external next_text : t -> [`Tree] node -> [`Text ] node = "caml_xml_tree_next_text" - + + let next_text t id = Printf.eprintf "Calling NextText for node %i with result" (Obj.magic id); + let did = if is_nil id then Text.nil else next_text t id + in Printf.eprintf " %i!!!\n%!" (Obj.magic did); did + external text_xml_id : t -> [`Text ] node -> int = "caml_xml_tree_text_xml_id" external node_xml_id : t -> [`Tree ] node -> int = "caml_xml_tree_node_xml_id" - + + + let print_skel t = + let rec aux id = + if (is_nil id) + then Printf.eprintf "#" + else + begin + Printf.eprintf "%s(" (Tag.to_string (tag t id)); + aux(first_child t id); + Printf.eprintf ",\n"; + aux(next_sibling t id); + Printf.eprintf ")\n"; + end + in + aux (root t) end - module Binary : BINARY = struct - + module Binary = struct + type node_content = [ `Node of [`Tree ] node | `String of [`Text ] node * [`Tree ] node ] @@ -296,7 +328,7 @@ struct type t = { doc : doc; node : descr } - + let dump { doc=t } = Tree.print_skel t open Tree let node_of_t t = { doc= t; node= Node(`Node (root t)) } @@ -319,7 +351,7 @@ struct let equal a b = (compare a b) == 0 let string t = match t.node with - | String i -> Text.get_text (text_collection t.doc) i + | String i -> Text.get_text (text_collection t.doc) i | _ -> assert false let norm (n : [`Tree ] node ) = if is_nil n then Nil else Node (`Node n) @@ -327,6 +359,7 @@ struct let descr t = t.node let first_child n = + Printf.eprintf "first_child!\n%!"; let node' = match n.node with | Nil | String _ -> failwith "first_child" @@ -342,6 +375,7 @@ struct { n with node = node'} let next_sibling n = + Printf.eprintf "next_sibling!\n%!"; let node' = match n.node with | Nil | String _ -> failwith "next_sibling" @@ -385,6 +419,7 @@ struct output_string outc tg; ( match l.node with Nil -> output_string outc "/>" + | String _ -> assert false | Node(_) when Tag.equal (tag l) Tag.attribute -> (loop_attributes (left l); match (right l).node with @@ -397,8 +432,8 @@ struct output_char outc '>' ) | _ -> output_char outc '>'; - loop (left l); - output_string outc "' );if print_right then loop r @@ -413,8 +448,13 @@ struct | _ -> () in loop ~print_right:false t - + + + end - + end + + +let dump = XML.Binary.dump include XML diff --git a/tree.mli b/tree.mli index 6bc38b5..060e086 100644 --- a/tree.mli +++ b/tree.mli @@ -24,3 +24,5 @@ sig end module Binary : BINARY + +val dump : Binary.t -> unit -- 2.17.1