PROFILE=true
VERBOSE=false
-MLSRCS = memory.ml tag.ml tagSet.ml tree.ml automaton.ml ulexer.ml xPath.ml main.ml
-MLISRCS = memory.mli automaton.mli tag.mli tagSet.mli tree.mli ulexer.mli xPath.mli
+MLSRCS = memory.ml tag.ml tagSet.ml options.ml tree.ml automaton.ml ulexer.ml xPath.ml main.ml
+MLISRCS = memory.mli options.mli automaton.mli tag.mli tagSet.mli tree.mli ulexer.mli xPath.mli
MLOBJS = $(MLSRCS:.ml=.cmx)
MLCINT = $(MLISRCS:.mli=.cmi)
#include <caml/memory.h>
#include <caml/callback.h>
#include <caml/fail.h>
+#include <caml/custom.h>
+
+
} //extern C
//#include "TextCollection/TextCollection.h"
#define CAMLRAISECPP(e) (caml_failwith( ((e).what())))
#define NOT_IMPLEMENTED(s) (caml_failwith(s))
-#define XMLTREE(x) ((XMLTree *)(x))
+#define XMLTREE(x) ((XMLTree *)(* (XMLTree**) Data_custom_val(x)))
#define TEXTCOLLECTION(x)
#define TREENODEVAL(i) ((treeNode) (Int_val(i)))
-extern "C" CAMLprim value caml_call_shredder_uri(value uri){
+extern "C" {
+ static struct custom_operations ops;
+ static bool initialized = false;
+}
+extern "C" void caml_xml_tree_finalize(value tree){
+ delete XMLTREE(tree);
+ return;
+}
+
+extern "C" void caml_init_ops () {
+
+ if (initialized)
+ return;
+ ops.identifier = (char*) "XMLTree";
+ ops.finalize = caml_xml_tree_finalize;
+ return;
+}
+
+extern "C" CAMLprim value caml_call_shredder_uri(value uri,value sf, value iet, value dtc){
CAMLparam1(uri);
CAMLlocal1(doc);
char *fn = String_val(uri);
try {
- XMLDocShredder shredder(fn);
+ XMLDocShredder shredder(fn,Int_val(sf),Bool_val(iet),Bool_val(dtc));
+ XMLTree * tree;
shredder.processStartDocument(fn);
shredder.parse();
shredder.processEndDocument();
- doc = (value) shredder.storageIfc_->returnDocument();
-
+ caml_init_ops();
+ doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2);
+ tree = (XMLTree *) shredder.storageIfc_->returnDocument();
+ memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*));
CAMLreturn(doc);
}
catch (const std::exception& e){
}
-extern "C" CAMLprim value caml_call_shredder_string(value data){
+extern "C" CAMLprim value caml_call_shredder_string(value data,value sf, value iet, value dtc){
CAMLparam1(data);
CAMLlocal1(doc);
unsigned int ln = string_length(data);
unsigned char *fn = (unsigned char*) String_val(data);
try {
- XMLDocShredder shredder(fn,ln);
+ XMLDocShredder shredder(fn,ln,Int_val(sf),Bool_val(iet),Bool_val(dtc));
+ XMLTree* tree;
shredder.processStartDocument("");
shredder.parse();
shredder.processEndDocument();
- doc = (value) shredder.storageIfc_->returnDocument();
-
+ caml_init_ops();
+ doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2);
+ tree = (XMLTree *) shredder.storageIfc_->returnDocument();
+ memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*));
CAMLreturn(doc);
}
catch (const std::exception& e) {
CAMLreturn (caml_copy_string(tag));
}
+
extern "C" CAMLprim value caml_xml_tree_tag_name(value tree, value tagid){
CAMLparam2(tree,tagid);
const char* tag;
#include "Utils.h"
-SXSIStorageInterface::SXSIStorageInterface()
+SXSIStorageInterface::SXSIStorageInterface(int sf,bool iet,bool dtc)
{
tree = new XMLTree();
- tree->OpenDocument(false,64);
+ tree->OpenDocument(iet,sf,dtc);
}
SXSIStorageInterface::~SXSIStorageInterface()
tree->NewText((unsigned char*) text.c_str());
}
}
-
void SXSIStorageInterface::nodeFinished(string name)
class SXSIStorageInterface: public StorageInterface
{
public:
- SXSIStorageInterface();
+ SXSIStorageInterface(int sf, bool iet, bool dtc);
virtual ~SXSIStorageInterface();
virtual void newChild(string name);
virtual void newText(string text);
}
XMLDocShredder::XMLDocShredder(const unsigned char * data,
- TextReader::size_type size)
+ TextReader::size_type size,
+ int sf,
+ bool iet,
+ bool dtc)
{
last_text = false;
reader_ = new TextReader(data,size,"");
setProperties();
- storageIfc_ = new SXSIStorageInterface();
+ storageIfc_ = new SXSIStorageInterface(sf,iet,dtc);
buffer = "";
}
-XMLDocShredder::XMLDocShredder(const string inFileName)
+XMLDocShredder::XMLDocShredder(const string inFileName,int sf, bool iet, bool dtc)
{
last_text = false;
reader_ = new TextReader(inFileName);
setProperties();
- storageIfc_ = new SXSIStorageInterface();
+ storageIfc_ = new SXSIStorageInterface(sf,iet,dtc);
buffer = "";
}
class XMLDocShredder
{
public:
- XMLDocShredder(const string inFileName);
- XMLDocShredder(const unsigned char * data, TextReader::size_type size);
- virtual ~XMLDocShredder();
- virtual void processStartElement();
- virtual void processEndElement();
- virtual void processPCDATA();
- virtual void processAttributes();
- virtual void processSignificantWhitespace();
- virtual void processStartDocument(const string docName);
- virtual void processEndDocument();
- virtual void processComment();
- virtual void processProcessingInstruction();
- virtual void processDocTypeDeclaration();
- virtual void processUnknownNodeType();
- virtual void processCDATASection();
- virtual void parse();
-
+ XMLDocShredder(const string inFileName,int sf, bool iet, bool dtc);
+ XMLDocShredder(const unsigned char * data, TextReader::size_type size,int sf, bool iet, bool dtc);
+ virtual ~XMLDocShredder();
+ virtual void processStartElement();
+ virtual void processEndElement();
+ virtual void processPCDATA();
+ virtual void processAttributes();
+ virtual void processSignificantWhitespace();
+ virtual void processStartDocument(const string docName);
+ virtual void processEndDocument();
+ virtual void processComment();
+ virtual void processProcessingInstruction();
+ virtual void processDocTypeDeclaration();
+ virtual void processUnknownNodeType();
+ virtual void processCDATASection();
+ virtual void parse();
+
StorageInterface *storageIfc_;
struct
let path = "."
let result_basename = "test"
- let num_runs = 5
- let run_with_output = true
+ let num_runs = 1
+ let run_with_output = false
let run_without_output = true
end
module I = INIT_TESTER (CONF)
-module Test = MK (SXSI) (MK (SaxonBXQuery) (I))
-
+module TestOld = MK (SXSI) (MK (SaxonBXQuery) (I))
+module Test = MK (SXSI) (I)
let l = Test.test_engine [] (make_queryset
["/home/kim/Documents/Work/Code/xpathcomp/tests/tiny.xml"]
- ["/descendant::*/descendant::*/descendant::*"])
+ ["/child::*"])
;;
tag.cmx: tag.cmi
tagSet.cmo: tag.cmi tagSet.cmi
tagSet.cmx: tag.cmx tagSet.cmi
-tree.cmo: tag.cmi tree.cmi
-tree.cmx: tag.cmx tree.cmi
+options.cmo: options.cmi
+options.cmx: options.cmi
+tree.cmo: tag.cmi options.cmi tree.cmi
+tree.cmx: tag.cmx options.cmx tree.cmi
automaton.cmo: tree.cmi tagSet.cmi tag.cmi automaton.cmi
automaton.cmx: tree.cmx tagSet.cmx tag.cmx automaton.cmi
ulexer.cmo: ulexer.cmi
ulexer.cmx: ulexer.cmi
xPath.cmo: ulexer.cmi tree.cmi tagSet.cmi tag.cmi automaton.cmi xPath.cmi
xPath.cmx: ulexer.cmx tree.cmx tagSet.cmx tag.cmx automaton.cmx xPath.cmi
-main.cmo: xPath.cmi ulexer.cmi tree.cmi tag.cmi automaton.cmi
-main.cmx: xPath.cmx ulexer.cmx tree.cmx tag.cmx automaton.cmx
+main.cmo: xPath.cmi ulexer.cmi tree.cmi tag.cmi options.cmi automaton.cmi
+main.cmx: xPath.cmx ulexer.cmx tree.cmx tag.cmx options.cmx automaton.cmx
+memory.cmi:
+options.cmi:
automaton.cmi: tree.cmi tagSet.cmi
+tag.cmi:
tagSet.cmi: tag.cmi
tree.cmi: tag.cmi
+ulexer.cmi:
xPath.cmi: tagSet.cmi automaton.cmi
Printf.eprintf "Total time : %fms\n Coherence : %i\n%!" (total_time())
;;
-let argc = Array.length Sys.argv;;
-if (argc < 3 || argc >4)
-then
- (prerr_endline ("usage : " ^ Sys.argv.(0) ^ " <document> \'query\'[ <output> ]");
- exit 1)
-;;
+Options.parse_cmdline();;
-main Sys.argv.(1) Sys.argv.(2) (if argc == 4 then Some Sys.argv.(3) else None) ;;
+main !Options.input_file !Options.query !Options.output_file;;
Printf.eprintf "\n=================================================\nDEBUGGING\n%!";
Tree.DEBUGTREE.print_stats Format.err_formatter;;
-
-
+Gc.full_major()
--- /dev/null
+let index_empty_texts = ref false
+let sample_factor = ref 64
+let disable_text_collection = ref false
+
+let query = ref ""
+let input_file = ref ""
+let output_file = ref None
+
+
+let usage_msg = Printf.sprintf "%s <input.xml> 'query' [output]" Sys.argv.(0)
+
+let anon_fun = let pos = ref 0 in
+ fun s -> match !pos with
+ | 0 -> input_file:= s;incr pos
+ | 1 -> query := s; incr pos
+ | 2 -> output_file := Some s; incr pos
+ | _ -> raise (Arg.Bad(s))
+
+let spec = [ "-f", Arg.Set_int(sample_factor),"sample factor [default=64]";
+ "-i", Arg.Set(index_empty_texts),"index empty texts [default=false]";
+ "-d", Arg.Set(disable_text_collection),"Disable text collection[default=false]"; ]
+
+let parse_cmdline() = Arg.parse spec anon_fun usage_msg
+
+
--- /dev/null
+val parse_cmdline : unit -> unit
+val index_empty_texts : bool ref
+val sample_factor : int ref
+val disable_text_collection : bool ref
+val query : string ref
+val input_file : string ref
+val output_file : string option ref
+
+
external tag_name : pool -> t -> string = "caml_xml_tree_tag_name"
let nullt = null_tag ()
-let pcdata = max_int
-let attribute = max_int - 1
+(* Defined in XMLTree.cpp *)
+let pcdata = 1
+let attribute = 0
-let pool = ref (null_pool ())
+let pool = Weak.create 1
-let init p = pool := p
+let init p = Weak.set pool 0 (Some p)
+
+let get_pool () = match Weak.get pool 0 with
+ | Some x -> x
+ | None -> failwith "Tag.ml: Uninitialized Document"
let tag s = match s with
| "<$>" -> pcdata
| "<@>" -> attribute
- | _ -> register_tag !pool s
+ | _ -> register_tag (get_pool()) s
let compare = (-)
let equal = (==)
let to_string t =
if t = pcdata then "<$>"
else if t = attribute then "<@>"
- else tag_name !pool t
+ else tag_name (get_pool()) t
let print ppf t = Format.fprintf ppf "%s" (to_string t)
external int_of_node : 'a node -> int = "%identity"
- external parse_xml_uri : string -> t = "caml_call_shredder_uri"
- let parse_xml_uri uri = parse_xml_uri uri
-
- external parse_xml_string : string -> t = "caml_call_shredder_string"
- let parse_xml_string uri = parse_xml_string uri
+ external parse_xml_uri : string -> int -> bool -> bool -> t = "caml_call_shredder_uri"
+
+ external parse_xml_string : string -> int -> bool -> bool -> t = "caml_call_shredder_string"
+
module Text =
struct
node = Node(NC (root t)) }
- let parse_xml_uri str = node_of_t (parse_xml_uri str)
- let parse_xml_string str = node_of_t (parse_xml_string str)
+ let parse_xml_uri str = node_of_t
+ (parse_xml_uri str
+ !Options.sample_factor !Options.index_empty_texts !Options.disable_text_collection)
+
+ let parse_xml_string str = node_of_t
+ (parse_xml_string str
+ !Options.sample_factor !Options.index_empty_texts !Options.disable_text_collection)
external pool : doc -> Tag.pool = "%identity"
aux (first_child n);
aux (next_sibling n)
in aux t
+
+ let print_stats _ = ()
end
end