X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=timeSXSI.cpp;fp=timeSXSI.cpp;h=e223a53238f085b63d3b97ead93acec70faa618f;hb=8e1b9299e0c8e1731db61955ef756fa92ed8c615;hp=0000000000000000000000000000000000000000;hpb=1ff2494510cb02d136cbde3a064c0c8c94ec4216;p=SXSI%2Fxpathcomp.git diff --git a/timeSXSI.cpp b/timeSXSI.cpp new file mode 100644 index 0000000..e223a53 --- /dev/null +++ b/timeSXSI.cpp @@ -0,0 +1,555 @@ +#include "XMLTree.h" +#include "Utils.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "timings.h" + + +using std::cout; +using std::string; +using std::left; +using std::right; +using std::cerr; + + +#define NUM_PATHS 10000 +#define RANDOM(m) ((unsigned long int) (random() % (m))) + +#define OUTPUT_LINE() (std::cerr << "------------------------------------\n\n") +#define SRX_MAGIC_STRING "SXSI_INDEX\n" +#define SRX_VERSION "2\n" +#define READ_UCHAR(pchar,file) do { \ + if (fread((pchar),sizeof(char),1,(file)) == 0) { \ + std::cerr << "Error: Invalid .srx file\n"; \ + return NULL; \ + }; \ + } while(0) \ + + +extern "C" { + typedef void ** tree_node; + +#define SIZE(v) (((unsigned long) ((v)[0])) >> 1) +#define ISBINARY(v) (((unsigned long) ((v)[0])) & 1) +#define CHILDR(v,i) (((tree_node)(v))[(i)+1]) +#define CHILD(v,i) ((tree_node) (((tree_node)(v))[(i)+1])) + + static tree_node create_node(long size,int binary){ + tree_node node = (tree_node) malloc(sizeof(void*) * (size+1)); + if (node) { + binary = binary & 1; + node[0] = (void *) (size << 1 | binary); + node[1] = NULL; + node[2] = NULL; + }; + return node; + } + + tree_node create_naive(XMLTree *tree, treeNode node, int prealloc){ + if (node == NULLT) + return NULL; + else { + tree_node self; + if (prealloc) + self = create_node(2,1); + + tree_node first = create_naive(tree,tree->FirstChild(node),prealloc); + tree_node next = create_naive(tree,tree->NextSibling(node),prealloc); + if (!prealloc) + self = create_node(2,1); + self[0] = (void *) (tree->NumChildren(node) << 1 | 1); + self[1] = first; + self[2] = next; + + return self; + }; + } + + tree_node create_with_child_array(XMLTree *tree, treeNode node){ + if (tree==NULL) + return NULL; + else { + long size = (long) tree->NumChildren(node); + tree_node self = create_node(size,0); + treeNode child = tree->FirstChild(node); + for(int i = 0; i < size; i++){ + CHILDR(self,i) = create_with_child_array(tree,child); + child = tree->NextSibling(child); + }; + return self; + }; + + } + + static unsigned long my_malloc_idx; + + tree_node my_malloc_create_node(tree_node pool) { + tree_node ret; + ret = &pool[my_malloc_idx * 3]; + my_malloc_idx++; + return ret; + } + + tree_node create_perfect_rec(XMLTree *tree, treeNode node, tree_node pool ,int prealloc ) { + if (node == NULLT) + return NULL; + else { + tree_node self; + if (prealloc) + self = my_malloc_create_node(pool); + + tree_node first = create_naive(tree,tree->FirstChild(node),prealloc); + tree_node next = create_naive(tree,tree->NextSibling(node),prealloc); + if (!prealloc) + self = my_malloc_create_node(pool); + self[0] = (void *) (tree->NumChildren(node) << 1 | 1); + self[1] = first; + self[2] = next; + + return self; + }; + + } + + tree_node create_perfect(XMLTree *tree, int prealloc){ + tree_node v = (tree_node) malloc(sizeof(void*)*3 * tree->Size()); + my_malloc_idx = 0; + if (v) + create_perfect_rec(tree,0,v,prealloc); + + return v; + + } + + void unranked_preorder(tree_node node){ + if (node == NULL) + return; + for(unsigned long i = 0; i < SIZE(node); i++) + unranked_preorder(CHILD(node,i)); + }; + + void binary_preorder(tree_node node){ + if (node == NULL) + return; + + binary_preorder( (tree_node) (node[1])); + binary_preorder( (tree_node) (node[2])); + }; + + void nsfs_order(tree_node node){ + if (node == NULL) + return; + + binary_preorder( (tree_node) (node[2])); + binary_preorder( (tree_node) (node[1])); + }; + + + + void pre_order(tree_node node){ + if (node == NULL) + return; + if (ISBINARY(node)) + binary_preorder(node); + else + unranked_preorder(node); + return; + } + + + tree_node binary_nth_child (tree_node node, unsigned long n){ + if (n >= SIZE(node)) + return NULL; + + tree_node child = CHILD(node,0); + for(unsigned long i = 0; i < n; i ++) + child=CHILD(child,1); + return child; + + } + + + void binary_random_path(tree_node node){ + if (node == NULL || SIZE(node) == 0) + return; + binary_random_path(binary_nth_child(node,RANDOM(SIZE(node)))); + } + + unsigned long crc = 0; + + void unranked_random_path(tree_node node){ + if (node == NULL || SIZE(node) == 0) + return; + unsigned long n = RANDOM(SIZE(node)); + crc+=n; + unranked_random_path(CHILD(node,n)); + } + + void random_path(tree_node node){ + if (ISBINARY(node)) + binary_random_path(node); + else + unranked_random_path(node); + + } + + void delete_tree_node(tree_node node){ + if (node) { + if (ISBINARY(node)) { + delete_tree_node((tree_node) (node[1])); + delete_tree_node((tree_node) (node[2])); + } + else + for(unsigned long i = 0; i < SIZE(node); i++) + delete_tree_node(CHILD(node,i)); + + free(node); + }; + } + +} + +void xml_tree_preorder(XMLTree* tree, treeNode node){ + if (node == NULLT) + return; + + xml_tree_preorder(tree,tree->FirstChild(node)); + xml_tree_preorder(tree,tree->NextSibling(node)); + +} +void xml_tree_unranked_preorder(XMLTree* tree,treeNode node){ + if (node == NULLT|| tree->IsLeaf(node)) + return; + + for(int i = 1; i <= tree->NumChildren(node); i++) + xml_tree_unranked_preorder(tree,tree->Child(node,i)); + +} + +void xml_tree_nsfc(XMLTree* tree, treeNode node){ + if (node == NULLT) + return; + + xml_tree_preorder(tree,tree->NextSibling(node)); + xml_tree_preorder(tree,tree->FirstChild(node)); + + +} + + +void xml_tree_time_siblings(XMLTree* tree){ + treeNode first = tree->FirstChild(0); + unsigned long children = tree->NumChildren(first); + unsigned long print = children / 100; + treeNode child; + double max_time = 0; + double max_i = 0; + double time = 0; + std::cerr << children << " children\n"; + std::cerr << "Timings:\n"; + for(unsigned long i = 1; i<= children;i++){ + STARTTIMER(); + for(int j = 0; j < 5; j++) + child = tree->Child(first,i); + STOPTIMER(Parsing); + time = tParsing / 5.0; + if (time >= max_time){ + max_time = time; + max_i = i; + }; + }; + std::cerr << "Peak for node " << max_i << " : " << time << "\n"; + return; + +} + + +void xml_tree_lcps(XMLTree * tree, treeNode node){ + if (node == NULLT) + return; + + xml_tree_lcps(tree,tree->LastChild(node)); + xml_tree_lcps(tree,tree->PrevSibling(node)); +} + +void xml_tree_random_path(XMLTree *tree, treeNode node){ + if (node == NULLT|| tree->IsLeaf(node)) + return; + unsigned long n = (unsigned long) RANDOM(tree->NumChildren(node)); + crc+=n; + xml_tree_random_path(tree,tree->Child(node,1+n)); +} + + +XMLTree* load_srx(const char* file){ + + char buffer[1024]; + char * read_; + int fd = open(file,O_RDONLY); + FILE* fp = fdopen(fd, "r"); + unsigned int u32 = 0; + unsigned char cbuff; + XMLTree* tree; + + if (fp == NULL) { + std::cerr << "Error: " << strerror(errno) << "\n"; + return NULL; + }; + + read_ = fgets(buffer,1023, fp); + + if (read_ == NULL || (strncmp(buffer,SRX_MAGIC_STRING,1023) != 0)){ + std::cerr << "Error: Invalid .srx file\n"; + return NULL; + }; + + read_ = fgets(buffer,1023,fp); + + if (read_ == NULL || (strncmp(buffer,SRX_VERSION,1023) != 0)){ + std::cerr << "Error: Invalid .srx file\n"; + return NULL; + }; + + //OCaml marshaled data structure + //Skip the first 4 bytes = unsigned int 32 + READ_UCHAR(&cbuff,fp); + READ_UCHAR(&cbuff,fp); + READ_UCHAR(&cbuff,fp); + READ_UCHAR(&cbuff,fp); + + READ_UCHAR(&cbuff,fp); + u32 = cbuff << 24; + READ_UCHAR(&cbuff,fp); + u32 += cbuff << 16; + READ_UCHAR(&cbuff,fp); + u32 += cbuff << 8; + READ_UCHAR(&cbuff,fp); + u32 += cbuff; + // u32 is the data size, we need now to skip (data_size + 20) - (4 * 2) bytes + // 20 is the header_size defined in marshal.ml + if (fseek(fp,u32+20-8, SEEK_CUR) == -1){ + std::cerr << "Error: " << strerror(errno) << "\n"; + return NULL; + }; + + // Now we point to the correct area in the file + // We make sure that the fd is at the correct position and call XMLTree::Load + if (lseek(fd,ftell(fp),SEEK_SET) == -1){ + std::cerr << "Error: " << strerror(errno) << "\n"; + return NULL; + }; + //Sampling factor is ignored + tree = XMLTree::Load(fd,true,64); + fclose(fp); //also closes fd! + return tree; + +} + +int main(int argc,char ** argv ){ + XMLTree *tree; + tree_node naive; + if (argc != 2) { + std::cerr << "Usage: " << argv[0] << " file.srx\n"; + exit (1); + }; + + STARTTIMER(); + tree = load_srx(argv[1]); + STOPTIMER(Loading); + PRINTTIME("Loading SRX file from disk",Loading); + + OUTPUT_LINE(); + /* + // Naive pointer structure, allocated in post order + STARTTIMER(); + naive = create_naive(tree,0,1); + STOPTIMER(Building); + PRINTTIME("Building pre-order allocated binary structure",Building); + + STARTTIMER(); + pre_order(naive); + STOPTIMER(Parsing); + PRINTTIME("Preoder traversal",Parsing); + + STARTTIMER(); + nsfs_order(naive); + STOPTIMER(Parsing); + PRINTTIME("NextSibling/FirstChild",Parsing); + + srandom(1); + STARTTIMER(); + for(int i = 0; i < NUM_PATHS; i++) + random_path(naive); + + STOPTIMER(Parsing); + PRINTTIME("10000 random paths",Parsing); + + STARTTIMER(); + delete_tree_node(naive); + STOPTIMER(Parsing); + PRINTTIME("Freeing structure",Parsing); + + OUTPUT_LINE(); + + // Naive pointer structure, allocated in pre-order + STARTTIMER(); + naive = create_naive(tree,0,0); + STOPTIMER(Building); + PRINTTIME("Building post-order allocated binary pointer structure",Building); + STARTTIMER(); + pre_order(naive); + STOPTIMER(Parsing); + PRINTTIME("Preoder traversal",Parsing); + + STARTTIMER(); + nsfs_order(naive); + STOPTIMER(Parsing); + PRINTTIME("NextSibling/FirstChild",Parsing); + + srandom(1); + STARTTIMER(); + for(int i = 0; i < NUM_PATHS; i++) + random_path(naive); + + STOPTIMER(Parsing); + PRINTTIME("10000 random paths",Parsing); + + + STARTTIMER(); + delete_tree_node(naive); + STOPTIMER(Parsing); + PRINTTIME("Freeing structure",Parsing); + + OUTPUT_LINE(); + + + + STARTTIMER(); + naive = create_with_child_array(tree,0); + STOPTIMER(Building); + PRINTTIME("Building child array structure",Building); + STARTTIMER(); + pre_order(naive); + STOPTIMER(Parsing); + PRINTTIME("Preoder traversal",Parsing); + + srandom(1); + STARTTIMER(); + for(int i = 0; i < NUM_PATHS; i++) + random_path(naive); + + STOPTIMER(Parsing); + PRINTTIME("10000 random paths",Parsing); + + std::cerr << "crc = " << crc << "\n"; + STARTTIMER(); + delete_tree_node(naive); + STOPTIMER(Parsing); + PRINTTIME("Freeing structure",Parsing); + + OUTPUT_LINE(); + + + // Naive pointer structure, allocated in pre-order + STARTTIMER(); + naive = create_perfect(tree,1); + STOPTIMER(Building); + PRINTTIME("Building pre-order pre-allocated contiguous binary structure",Building); + STARTTIMER(); + pre_order(naive); + STOPTIMER(Parsing); + PRINTTIME("Preoder traversal",Parsing); + + STARTTIMER(); + nsfs_order(naive); + STOPTIMER(Parsing); + PRINTTIME("NextSibling/FirstChild",Parsing); + + srandom(1); + STARTTIMER(); + for(int i = 0; i < NUM_PATHS; i++) + random_path(naive); + + STOPTIMER(Parsing); + PRINTTIME("10000 random paths",Parsing); + + STARTTIMER(); + free(naive); + STOPTIMER(Parsing); + PRINTTIME("Freeing structure",Parsing); + + OUTPUT_LINE(); + + // Naive pointer structure, allocated in pre-order + STARTTIMER(); + naive = create_perfect(tree,0); + STOPTIMER(Building); + PRINTTIME("Building post-order pre-allocated contiguous binary structure",Building); + STARTTIMER(); + pre_order(naive); + STOPTIMER(Parsing); + PRINTTIME("Preoder traversal",Parsing); + + STARTTIMER(); + nsfs_order(naive); + STOPTIMER(Parsing); + PRINTTIME("NextSibling/FirstChild",Parsing); + + srandom(1); + STARTTIMER(); + for(int i = 0; i < NUM_PATHS; i++) + random_path(naive); + + STOPTIMER(Parsing); + PRINTTIME("10000 random paths",Parsing); + std::cerr << "crc = " << crc << "\n"; + STARTTIMER(); + free(naive); + STOPTIMER(Parsing); + PRINTTIME("Freeing structure",Parsing); + + OUTPUT_LINE(); + + // XMLTree structure + std::cerr << "Timing XMLTree structure\n"; + STARTTIMER(); + + xml_tree_preorder(tree,0); + STOPTIMER(Parsing); + PRINTTIME("Preoder traversal",Parsing); + + STARTTIMER(); + xml_tree_nsfc(tree,0); + STOPTIMER(Parsing); + PRINTTIME("NextSibling/FirstChild",Parsing); + + STARTTIMER(); + xml_tree_unranked_preorder(tree,0); + STOPTIMER(Parsing); + PRINTTIME("Unranked pre-order",Parsing); + + srandom(1); + STARTTIMER(); + crc = 0; + for(int i = 0; i < NUM_PATHS; i++) + xml_tree_random_path(tree,0); + + STOPTIMER(Parsing); + PRINTTIME("10000 random paths",Parsing); + std::cerr << "crc = " << crc << "\n"; + OUTPUT_LINE(); + */ + xml_tree_time_siblings(tree); + + + exit(0); +}; +