From: kim Date: Tue, 1 Sep 2009 22:32:20 +0000 (+0000) Subject: . X-Git-Url: http://git.nguyen.vg/gitweb/?a=commitdiff_plain;h=8e1b9299e0c8e1731db61955ef756fa92ed8c615;p=SXSI%2Fxpathcomp.git . git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@561 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- diff --git a/Makefile b/Makefile index 0fdc017..b033ed1 100644 --- a/Makefile +++ b/Makefile @@ -20,7 +20,9 @@ PPINCLUDES=$(OCAMLINCLUDES:%=-ppopt %) CXXSOURCES = results.c XMLDocShredder.cpp OCamlDriver.cpp CXXOBJECTS1 = $(CXXSOURCES:.cpp=.o) CXXOBJECTS = $(CXXOBJECTS1:.c=.o) -CXXINCLUDES = \ + +CAMLINCLUDES= -I`ocamlc -where` +LIBXMLINCLUDES= \ -I/usr/include/libxml++-2.6 \ -I/usr/include/libxml2 \ -I/usr/include/glibmm-2.4 \ @@ -30,13 +32,16 @@ CXXINCLUDES = \ -I/usr/lib/glibmm-2.4/include \ -I/usr/lib/sigc++-2.0/include \ -I/usr/lib/glib-2.0/include\ - -I`ocamlc -where`\ + +SXSIINCLUDES = \ -IXMLTree \ -IXMLTree/libcds/includes \ -IXMLTree/TextCollection -CXXFLAGS = -O3 -Wall $(INCLUDEDIRS) -fPIC -std=c++0x -CCFLAGS = -O3 -Wall -fPIC +CXXINCLUDES= $(CAMLINCLUDES) $(LIBXMLINCLUDES) $(SXSIINCLUDES) + +CXXFLAGS = -O3 -Wall $(INCLUDEDIRS) -fPIC -std=c++0x -static +CCFLAGS = -O3 -Wall -fPIC -static ifeq ($(VERBOSE),true) HIDE= @@ -138,6 +143,15 @@ timeXMLTree: $(CXXOBJECTS) XMLTree/XMLTree.a timeXMLTree.cpp myTimeXMLTree.cpp # $(LIBS) testXMLTree.cpp rm -rf .libs +timeSXSI: $(CXXOBJECTS) XMLTree/XMLTree.a timeSXSI.cpp + mkdir -p .libs/ + cd .libs/ && ar x ../XMLTree/XMLTree.a + $(CXX) -o timeSXSI $(CXXFLAGS) $(SXSIINCLUDES) -I/usr/include/libxml2 -lxml2 ./.libs/*.o timeSXSI.cpp +# $(CXX) -o testXMLTree $(CXXFLAGS) $(CXXINCLUDES) XMLDocShredder.o \ +# SXSIStorageInterface.o StorageInterface.o ./.libs/*.o \ +# $(LIBS) testXMLTree.cpp + rm -rf .libs + XMLDocShredder.o: XMLDocShredder.h XMLDocShredder.cpp OCamlDriver.o: XMLDocShredder.h results.o: results.h diff --git a/OCamlDriver.cpp b/OCamlDriver.cpp index 8f84194..21e5398 100644 --- a/OCamlDriver.cpp +++ b/OCamlDriver.cpp @@ -790,24 +790,16 @@ int iterfcns(XMLTree* tree, treeNode node){ if (node == NULLT) return 0; else { - return /*1+ iterfcns(tree,tree->FirstChild(node)) +*/ - iterfcns(tree,tree->NextSibling(node)); + return 1+ iterfcns(tree,tree->NextSibling(node)) + iterfcns(tree,tree->FirstChild(node)); }; } -/* + extern "C" value caml_benchmark_fcns(value tree){ int i = iterfcns(XMLTREE(tree),0); return Val_unit; - + } -*/ -extern "C" value caml_benchmark_fcns(value tree){ - treeNode root = XMLTREE(tree)->FirstChild(0); - root = XMLTREE(tree)->FirstChild(root); - iterfcns(XMLTREE(tree),root); - return Val_unit; -} int iterlcps(XMLTree* tree, treeNode node){ if (node == NULLT) return 0; @@ -869,7 +861,7 @@ extern "C" { if (n == NULL) return 0; else { - return (1+ iter_tree(n->first) + iter_tree(n->next)); + return (1+ iter_tree(n->next)+ iter_tree(n->first) ); }; } diff --git a/main.ml b/main.ml index 26b445e..0650b6b 100644 --- a/main.ml +++ b/main.ml @@ -71,15 +71,14 @@ let main v query_string output = Ulexer.Loc.Exc_located ((x,y),e) -> Printf.eprintf "character %i-%i %s\n" x y (Printexc.to_string e);exit 1 in let _ = Printf.eprintf "Number of nodes %i\n%!" (Tree.size v) in - let _ = Tree.stats v in +(* let _ = Tree.stats v in let _ = Printf.eprintf "Timing first_child/next_sibling %!" in let _ = time (Tree.benchmark_fcns) v in let _ = Printf.eprintf "Timing last_child/prev_sibling %!" in let _ = time (Tree.benchmark_lcps) v in let _ = Printf.eprintf "Timing jump to a %!" in - let _ = time (Tree.benchmark_jump v) (Tag.tag "a") in - -(* let _ = Printf.eprintf "Timing pointer allocation %!" in + let _ = time (Tree.benchmark_jump v) (Tag.tag "a") in + let _ = Printf.eprintf "Timing pointer allocation %!" in let pointers = time (build_pointers) v in let _ = Printf.eprintf "Timing pointer iteration %!" in let i = time (iter_pointers) pointers in diff --git a/timeSXSI.cpp b/timeSXSI.cpp new file mode 100644 index 0000000..e223a53 --- /dev/null +++ b/timeSXSI.cpp @@ -0,0 +1,555 @@ +#include "XMLTree.h" +#include "Utils.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "timings.h" + + +using std::cout; +using std::string; +using std::left; +using std::right; +using std::cerr; + + +#define NUM_PATHS 10000 +#define RANDOM(m) ((unsigned long int) (random() % (m))) + +#define OUTPUT_LINE() (std::cerr << "------------------------------------\n\n") +#define SRX_MAGIC_STRING "SXSI_INDEX\n" +#define SRX_VERSION "2\n" +#define READ_UCHAR(pchar,file) do { \ + if (fread((pchar),sizeof(char),1,(file)) == 0) { \ + std::cerr << "Error: Invalid .srx file\n"; \ + return NULL; \ + }; \ + } while(0) \ + + +extern "C" { + typedef void ** tree_node; + +#define SIZE(v) (((unsigned long) ((v)[0])) >> 1) +#define ISBINARY(v) (((unsigned long) ((v)[0])) & 1) +#define CHILDR(v,i) (((tree_node)(v))[(i)+1]) +#define CHILD(v,i) ((tree_node) (((tree_node)(v))[(i)+1])) + + static tree_node create_node(long size,int binary){ + tree_node node = (tree_node) malloc(sizeof(void*) * (size+1)); + if (node) { + binary = binary & 1; + node[0] = (void *) (size << 1 | binary); + node[1] = NULL; + node[2] = NULL; + }; + return node; + } + + tree_node create_naive(XMLTree *tree, treeNode node, int prealloc){ + if (node == NULLT) + return NULL; + else { + tree_node self; + if (prealloc) + self = create_node(2,1); + + tree_node first = create_naive(tree,tree->FirstChild(node),prealloc); + tree_node next = create_naive(tree,tree->NextSibling(node),prealloc); + if (!prealloc) + self = create_node(2,1); + self[0] = (void *) (tree->NumChildren(node) << 1 | 1); + self[1] = first; + self[2] = next; + + return self; + }; + } + + tree_node create_with_child_array(XMLTree *tree, treeNode node){ + if (tree==NULL) + return NULL; + else { + long size = (long) tree->NumChildren(node); + tree_node self = create_node(size,0); + treeNode child = tree->FirstChild(node); + for(int i = 0; i < size; i++){ + CHILDR(self,i) = create_with_child_array(tree,child); + child = tree->NextSibling(child); + }; + return self; + }; + + } + + static unsigned long my_malloc_idx; + + tree_node my_malloc_create_node(tree_node pool) { + tree_node ret; + ret = &pool[my_malloc_idx * 3]; + my_malloc_idx++; + return ret; + } + + tree_node create_perfect_rec(XMLTree *tree, treeNode node, tree_node pool ,int prealloc ) { + if (node == NULLT) + return NULL; + else { + tree_node self; + if (prealloc) + self = my_malloc_create_node(pool); + + tree_node first = create_naive(tree,tree->FirstChild(node),prealloc); + tree_node next = create_naive(tree,tree->NextSibling(node),prealloc); + if (!prealloc) + self = my_malloc_create_node(pool); + self[0] = (void *) (tree->NumChildren(node) << 1 | 1); + self[1] = first; + self[2] = next; + + return self; + }; + + } + + tree_node create_perfect(XMLTree *tree, int prealloc){ + tree_node v = (tree_node) malloc(sizeof(void*)*3 * tree->Size()); + my_malloc_idx = 0; + if (v) + create_perfect_rec(tree,0,v,prealloc); + + return v; + + } + + void unranked_preorder(tree_node node){ + if (node == NULL) + return; + for(unsigned long i = 0; i < SIZE(node); i++) + unranked_preorder(CHILD(node,i)); + }; + + void binary_preorder(tree_node node){ + if (node == NULL) + return; + + binary_preorder( (tree_node) (node[1])); + binary_preorder( (tree_node) (node[2])); + }; + + void nsfs_order(tree_node node){ + if (node == NULL) + return; + + binary_preorder( (tree_node) (node[2])); + binary_preorder( (tree_node) (node[1])); + }; + + + + void pre_order(tree_node node){ + if (node == NULL) + return; + if (ISBINARY(node)) + binary_preorder(node); + else + unranked_preorder(node); + return; + } + + + tree_node binary_nth_child (tree_node node, unsigned long n){ + if (n >= SIZE(node)) + return NULL; + + tree_node child = CHILD(node,0); + for(unsigned long i = 0; i < n; i ++) + child=CHILD(child,1); + return child; + + } + + + void binary_random_path(tree_node node){ + if (node == NULL || SIZE(node) == 0) + return; + binary_random_path(binary_nth_child(node,RANDOM(SIZE(node)))); + } + + unsigned long crc = 0; + + void unranked_random_path(tree_node node){ + if (node == NULL || SIZE(node) == 0) + return; + unsigned long n = RANDOM(SIZE(node)); + crc+=n; + unranked_random_path(CHILD(node,n)); + } + + void random_path(tree_node node){ + if (ISBINARY(node)) + binary_random_path(node); + else + unranked_random_path(node); + + } + + void delete_tree_node(tree_node node){ + if (node) { + if (ISBINARY(node)) { + delete_tree_node((tree_node) (node[1])); + delete_tree_node((tree_node) (node[2])); + } + else + for(unsigned long i = 0; i < SIZE(node); i++) + delete_tree_node(CHILD(node,i)); + + free(node); + }; + } + +} + +void xml_tree_preorder(XMLTree* tree, treeNode node){ + if (node == NULLT) + return; + + xml_tree_preorder(tree,tree->FirstChild(node)); + xml_tree_preorder(tree,tree->NextSibling(node)); + +} +void xml_tree_unranked_preorder(XMLTree* tree,treeNode node){ + if (node == NULLT|| tree->IsLeaf(node)) + return; + + for(int i = 1; i <= tree->NumChildren(node); i++) + xml_tree_unranked_preorder(tree,tree->Child(node,i)); + +} + +void xml_tree_nsfc(XMLTree* tree, treeNode node){ + if (node == NULLT) + return; + + xml_tree_preorder(tree,tree->NextSibling(node)); + xml_tree_preorder(tree,tree->FirstChild(node)); + + +} + + +void xml_tree_time_siblings(XMLTree* tree){ + treeNode first = tree->FirstChild(0); + unsigned long children = tree->NumChildren(first); + unsigned long print = children / 100; + treeNode child; + double max_time = 0; + double max_i = 0; + double time = 0; + std::cerr << children << " children\n"; + std::cerr << "Timings:\n"; + for(unsigned long i = 1; i<= children;i++){ + STARTTIMER(); + for(int j = 0; j < 5; j++) + child = tree->Child(first,i); + STOPTIMER(Parsing); + time = tParsing / 5.0; + if (time >= max_time){ + max_time = time; + max_i = i; + }; + }; + std::cerr << "Peak for node " << max_i << " : " << time << "\n"; + return; + +} + + +void xml_tree_lcps(XMLTree * tree, treeNode node){ + if (node == NULLT) + return; + + xml_tree_lcps(tree,tree->LastChild(node)); + xml_tree_lcps(tree,tree->PrevSibling(node)); +} + +void xml_tree_random_path(XMLTree *tree, treeNode node){ + if (node == NULLT|| tree->IsLeaf(node)) + return; + unsigned long n = (unsigned long) RANDOM(tree->NumChildren(node)); + crc+=n; + xml_tree_random_path(tree,tree->Child(node,1+n)); +} + + +XMLTree* load_srx(const char* file){ + + char buffer[1024]; + char * read_; + int fd = open(file,O_RDONLY); + FILE* fp = fdopen(fd, "r"); + unsigned int u32 = 0; + unsigned char cbuff; + XMLTree* tree; + + if (fp == NULL) { + std::cerr << "Error: " << strerror(errno) << "\n"; + return NULL; + }; + + read_ = fgets(buffer,1023, fp); + + if (read_ == NULL || (strncmp(buffer,SRX_MAGIC_STRING,1023) != 0)){ + std::cerr << "Error: Invalid .srx file\n"; + return NULL; + }; + + read_ = fgets(buffer,1023,fp); + + if (read_ == NULL || (strncmp(buffer,SRX_VERSION,1023) != 0)){ + std::cerr << "Error: Invalid .srx file\n"; + return NULL; + }; + + //OCaml marshaled data structure + //Skip the first 4 bytes = unsigned int 32 + READ_UCHAR(&cbuff,fp); + READ_UCHAR(&cbuff,fp); + READ_UCHAR(&cbuff,fp); + READ_UCHAR(&cbuff,fp); + + READ_UCHAR(&cbuff,fp); + u32 = cbuff << 24; + READ_UCHAR(&cbuff,fp); + u32 += cbuff << 16; + READ_UCHAR(&cbuff,fp); + u32 += cbuff << 8; + READ_UCHAR(&cbuff,fp); + u32 += cbuff; + // u32 is the data size, we need now to skip (data_size + 20) - (4 * 2) bytes + // 20 is the header_size defined in marshal.ml + if (fseek(fp,u32+20-8, SEEK_CUR) == -1){ + std::cerr << "Error: " << strerror(errno) << "\n"; + return NULL; + }; + + // Now we point to the correct area in the file + // We make sure that the fd is at the correct position and call XMLTree::Load + if (lseek(fd,ftell(fp),SEEK_SET) == -1){ + std::cerr << "Error: " << strerror(errno) << "\n"; + return NULL; + }; + //Sampling factor is ignored + tree = XMLTree::Load(fd,true,64); + fclose(fp); //also closes fd! + return tree; + +} + +int main(int argc,char ** argv ){ + XMLTree *tree; + tree_node naive; + if (argc != 2) { + std::cerr << "Usage: " << argv[0] << " file.srx\n"; + exit (1); + }; + + STARTTIMER(); + tree = load_srx(argv[1]); + STOPTIMER(Loading); + PRINTTIME("Loading SRX file from disk",Loading); + + OUTPUT_LINE(); + /* + // Naive pointer structure, allocated in post order + STARTTIMER(); + naive = create_naive(tree,0,1); + STOPTIMER(Building); + PRINTTIME("Building pre-order allocated binary structure",Building); + + STARTTIMER(); + pre_order(naive); + STOPTIMER(Parsing); + PRINTTIME("Preoder traversal",Parsing); + + STARTTIMER(); + nsfs_order(naive); + STOPTIMER(Parsing); + PRINTTIME("NextSibling/FirstChild",Parsing); + + srandom(1); + STARTTIMER(); + for(int i = 0; i < NUM_PATHS; i++) + random_path(naive); + + STOPTIMER(Parsing); + PRINTTIME("10000 random paths",Parsing); + + STARTTIMER(); + delete_tree_node(naive); + STOPTIMER(Parsing); + PRINTTIME("Freeing structure",Parsing); + + OUTPUT_LINE(); + + // Naive pointer structure, allocated in pre-order + STARTTIMER(); + naive = create_naive(tree,0,0); + STOPTIMER(Building); + PRINTTIME("Building post-order allocated binary pointer structure",Building); + STARTTIMER(); + pre_order(naive); + STOPTIMER(Parsing); + PRINTTIME("Preoder traversal",Parsing); + + STARTTIMER(); + nsfs_order(naive); + STOPTIMER(Parsing); + PRINTTIME("NextSibling/FirstChild",Parsing); + + srandom(1); + STARTTIMER(); + for(int i = 0; i < NUM_PATHS; i++) + random_path(naive); + + STOPTIMER(Parsing); + PRINTTIME("10000 random paths",Parsing); + + + STARTTIMER(); + delete_tree_node(naive); + STOPTIMER(Parsing); + PRINTTIME("Freeing structure",Parsing); + + OUTPUT_LINE(); + + + + STARTTIMER(); + naive = create_with_child_array(tree,0); + STOPTIMER(Building); + PRINTTIME("Building child array structure",Building); + STARTTIMER(); + pre_order(naive); + STOPTIMER(Parsing); + PRINTTIME("Preoder traversal",Parsing); + + srandom(1); + STARTTIMER(); + for(int i = 0; i < NUM_PATHS; i++) + random_path(naive); + + STOPTIMER(Parsing); + PRINTTIME("10000 random paths",Parsing); + + std::cerr << "crc = " << crc << "\n"; + STARTTIMER(); + delete_tree_node(naive); + STOPTIMER(Parsing); + PRINTTIME("Freeing structure",Parsing); + + OUTPUT_LINE(); + + + // Naive pointer structure, allocated in pre-order + STARTTIMER(); + naive = create_perfect(tree,1); + STOPTIMER(Building); + PRINTTIME("Building pre-order pre-allocated contiguous binary structure",Building); + STARTTIMER(); + pre_order(naive); + STOPTIMER(Parsing); + PRINTTIME("Preoder traversal",Parsing); + + STARTTIMER(); + nsfs_order(naive); + STOPTIMER(Parsing); + PRINTTIME("NextSibling/FirstChild",Parsing); + + srandom(1); + STARTTIMER(); + for(int i = 0; i < NUM_PATHS; i++) + random_path(naive); + + STOPTIMER(Parsing); + PRINTTIME("10000 random paths",Parsing); + + STARTTIMER(); + free(naive); + STOPTIMER(Parsing); + PRINTTIME("Freeing structure",Parsing); + + OUTPUT_LINE(); + + // Naive pointer structure, allocated in pre-order + STARTTIMER(); + naive = create_perfect(tree,0); + STOPTIMER(Building); + PRINTTIME("Building post-order pre-allocated contiguous binary structure",Building); + STARTTIMER(); + pre_order(naive); + STOPTIMER(Parsing); + PRINTTIME("Preoder traversal",Parsing); + + STARTTIMER(); + nsfs_order(naive); + STOPTIMER(Parsing); + PRINTTIME("NextSibling/FirstChild",Parsing); + + srandom(1); + STARTTIMER(); + for(int i = 0; i < NUM_PATHS; i++) + random_path(naive); + + STOPTIMER(Parsing); + PRINTTIME("10000 random paths",Parsing); + std::cerr << "crc = " << crc << "\n"; + STARTTIMER(); + free(naive); + STOPTIMER(Parsing); + PRINTTIME("Freeing structure",Parsing); + + OUTPUT_LINE(); + + // XMLTree structure + std::cerr << "Timing XMLTree structure\n"; + STARTTIMER(); + + xml_tree_preorder(tree,0); + STOPTIMER(Parsing); + PRINTTIME("Preoder traversal",Parsing); + + STARTTIMER(); + xml_tree_nsfc(tree,0); + STOPTIMER(Parsing); + PRINTTIME("NextSibling/FirstChild",Parsing); + + STARTTIMER(); + xml_tree_unranked_preorder(tree,0); + STOPTIMER(Parsing); + PRINTTIME("Unranked pre-order",Parsing); + + srandom(1); + STARTTIMER(); + crc = 0; + for(int i = 0; i < NUM_PATHS; i++) + xml_tree_random_path(tree,0); + + STOPTIMER(Parsing); + PRINTTIME("10000 random paths",Parsing); + std::cerr << "crc = " << crc << "\n"; + OUTPUT_LINE(); + */ + xml_tree_time_siblings(tree); + + + exit(0); +}; + diff --git a/timeXMLTree.cpp b/timeXMLTree.cpp index 49fd56e..6e8bcec 100644 --- a/timeXMLTree.cpp +++ b/timeXMLTree.cpp @@ -5,6 +5,11 @@ #include #include +#define read32u() \ + (intern_src += 4, \ + ((uintnat)(intern_src[-4]) << 24) + (intern_src[-3] << 16) + \ + (intern_src[-2] << 8) + intern_src[-1]) + using std::cout; using std::string; using std::left;