23 #define NUM_PATHS 10000
24 #define RANDOM(m) ((unsigned long int) (random() % (m)))
26 #define OUTPUT_LINE() (std::cerr << "------------------------------------\n\n")
27 #define SRX_MAGIC_STRING "SXSI_INDEX\n"
28 #define SRX_VERSION "2\n"
29 #define READ_UCHAR(pchar,file) do { \
30 if (fread((pchar),sizeof(char),1,(file)) == 0) { \
31 std::cerr << "Error: Invalid .srx file\n"; \
38 typedef void ** tree_node;
40 #define SIZE(v) (((unsigned long) ((v)[0])) >> 1)
41 #define ISBINARY(v) (((unsigned long) ((v)[0])) & 1)
42 #define CHILDR(v,i) (((tree_node)(v))[(i)+1])
43 #define CHILD(v,i) ((tree_node) (((tree_node)(v))[(i)+1]))
45 static tree_node create_node(long size,int binary){
46 tree_node node = (tree_node) malloc(sizeof(void*) * (size+1));
49 node[0] = (void *) (size << 1 | binary);
56 tree_node create_naive(XMLTree *tree, treeNode node, int prealloc){
62 self = create_node(2,1);
64 tree_node first = create_naive(tree,tree->FirstChild(node),prealloc);
65 tree_node next = create_naive(tree,tree->NextSibling(node),prealloc);
67 self = create_node(2,1);
68 self[0] = (void *) (tree->NumChildren(node) << 1 | 1);
76 tree_node create_with_child_array(XMLTree *tree, treeNode node){
80 long size = (long) tree->NumChildren(node);
81 tree_node self = create_node(size,0);
82 treeNode child = tree->FirstChild(node);
83 for(int i = 0; i < size; i++){
84 CHILDR(self,i) = create_with_child_array(tree,child);
85 child = tree->NextSibling(child);
92 static unsigned long my_malloc_idx;
94 tree_node my_malloc_create_node(tree_node pool) {
96 ret = &pool[my_malloc_idx * 3];
101 tree_node create_perfect_rec(XMLTree *tree, treeNode node, tree_node pool ,int prealloc ) {
107 self = my_malloc_create_node(pool);
109 tree_node first = create_naive(tree,tree->FirstChild(node),prealloc);
110 tree_node next = create_naive(tree,tree->NextSibling(node),prealloc);
112 self = my_malloc_create_node(pool);
113 self[0] = (void *) (tree->NumChildren(node) << 1 | 1);
122 tree_node create_perfect(XMLTree *tree, int prealloc){
123 tree_node v = (tree_node) malloc(sizeof(void*)*3 * tree->Size());
126 create_perfect_rec(tree,0,v,prealloc);
132 void unranked_preorder(tree_node node){
135 for(unsigned long i = 0; i < SIZE(node); i++)
136 unranked_preorder(CHILD(node,i));
139 void binary_preorder(tree_node node){
143 binary_preorder( (tree_node) (node[1]));
144 binary_preorder( (tree_node) (node[2]));
147 void nsfs_order(tree_node node){
151 binary_preorder( (tree_node) (node[2]));
152 binary_preorder( (tree_node) (node[1]));
157 void pre_order(tree_node node){
161 binary_preorder(node);
163 unranked_preorder(node);
168 tree_node binary_nth_child (tree_node node, unsigned long n){
172 tree_node child = CHILD(node,0);
173 for(unsigned long i = 0; i < n; i ++)
174 child=CHILD(child,1);
180 void binary_random_path(tree_node node){
181 if (node == NULL || SIZE(node) == 0)
183 binary_random_path(binary_nth_child(node,RANDOM(SIZE(node))));
186 unsigned long crc = 0;
188 void unranked_random_path(tree_node node){
189 if (node == NULL || SIZE(node) == 0)
191 unsigned long n = RANDOM(SIZE(node));
193 unranked_random_path(CHILD(node,n));
196 void random_path(tree_node node){
198 binary_random_path(node);
200 unranked_random_path(node);
204 void delete_tree_node(tree_node node){
206 if (ISBINARY(node)) {
207 delete_tree_node((tree_node) (node[1]));
208 delete_tree_node((tree_node) (node[2]));
211 for(unsigned long i = 0; i < SIZE(node); i++)
212 delete_tree_node(CHILD(node,i));
220 void xml_tree_preorder(XMLTree* tree, treeNode node){
224 xml_tree_preorder(tree,tree->FirstChild(node));
225 xml_tree_preorder(tree,tree->NextSibling(node));
228 void xml_tree_unranked_preorder(XMLTree* tree,treeNode node){
229 if (node == NULLT|| tree->IsLeaf(node))
232 for(int i = 1; i <= tree->NumChildren(node); i++)
233 xml_tree_unranked_preorder(tree,tree->Child(node,i));
237 void xml_tree_nsfc(XMLTree* tree, treeNode node){
241 xml_tree_preorder(tree,tree->NextSibling(node));
242 xml_tree_preorder(tree,tree->FirstChild(node));
248 void xml_tree_time_siblings(XMLTree* tree){
249 treeNode first = tree->FirstChild(0);
250 unsigned long children = tree->NumChildren(first);
251 unsigned long print = children / 100;
256 std::cerr << children << " children\n";
257 std::cerr << "Timings:\n";
258 for(unsigned long i = 1; i<= children;i++){
260 for(int j = 0; j < 5; j++)
261 child = tree->Child(first,i);
263 time = tParsing / 5.0;
264 if (time >= max_time){
269 std::cerr << "Peak for node " << max_i << " : " << time << "\n";
275 void xml_tree_lcps(XMLTree * tree, treeNode node){
279 xml_tree_lcps(tree,tree->LastChild(node));
280 xml_tree_lcps(tree,tree->PrevSibling(node));
283 void xml_tree_random_path(XMLTree *tree, treeNode node){
284 if (node == NULLT|| tree->IsLeaf(node))
286 unsigned long n = (unsigned long) RANDOM(tree->NumChildren(node));
288 xml_tree_random_path(tree,tree->Child(node,1+n));
292 XMLTree* load_srx(const char* file){
296 int fd = open(file,O_RDONLY);
297 FILE* fp = fdopen(fd, "r");
298 unsigned int u32 = 0;
303 std::cerr << "Error: " << strerror(errno) << "\n";
307 read_ = fgets(buffer,1023, fp);
309 if (read_ == NULL || (strncmp(buffer,SRX_MAGIC_STRING,1023) != 0)){
310 std::cerr << "Error: Invalid .srx file\n";
314 read_ = fgets(buffer,1023,fp);
316 if (read_ == NULL || (strncmp(buffer,SRX_VERSION,1023) != 0)){
317 std::cerr << "Error: Invalid .srx file\n";
321 //OCaml marshaled data structure
322 //Skip the first 4 bytes = unsigned int 32
323 READ_UCHAR(&cbuff,fp);
324 READ_UCHAR(&cbuff,fp);
325 READ_UCHAR(&cbuff,fp);
326 READ_UCHAR(&cbuff,fp);
328 READ_UCHAR(&cbuff,fp);
330 READ_UCHAR(&cbuff,fp);
332 READ_UCHAR(&cbuff,fp);
334 READ_UCHAR(&cbuff,fp);
336 // u32 is the data size, we need now to skip (data_size + 20) - (4 * 2) bytes
337 // 20 is the header_size defined in marshal.ml
338 if (fseek(fp,u32+20-8, SEEK_CUR) == -1){
339 std::cerr << "Error: " << strerror(errno) << "\n";
343 // Now we point to the correct area in the file
344 // We make sure that the fd is at the correct position and call XMLTree::Load
345 if (lseek(fd,ftell(fp),SEEK_SET) == -1){
346 std::cerr << "Error: " << strerror(errno) << "\n";
349 //Sampling factor is ignored
350 tree = XMLTree::Load(fd,true,64);
351 fclose(fp); //also closes fd!
356 int main(int argc,char ** argv ){
360 std::cerr << "Usage: " << argv[0] << " file.srx\n";
365 tree = load_srx(argv[1]);
367 PRINTTIME("Loading SRX file from disk",Loading);
371 // Naive pointer structure, allocated in post order
373 naive = create_naive(tree,0,1);
375 PRINTTIME("Building pre-order allocated binary structure",Building);
380 PRINTTIME("Preoder traversal",Parsing);
385 PRINTTIME("NextSibling/FirstChild",Parsing);
389 for(int i = 0; i < NUM_PATHS; i++)
393 PRINTTIME("10000 random paths",Parsing);
396 delete_tree_node(naive);
398 PRINTTIME("Freeing structure",Parsing);
402 // Naive pointer structure, allocated in pre-order
404 naive = create_naive(tree,0,0);
406 PRINTTIME("Building post-order allocated binary pointer structure",Building);
410 PRINTTIME("Preoder traversal",Parsing);
415 PRINTTIME("NextSibling/FirstChild",Parsing);
419 for(int i = 0; i < NUM_PATHS; i++)
423 PRINTTIME("10000 random paths",Parsing);
427 delete_tree_node(naive);
429 PRINTTIME("Freeing structure",Parsing);
436 naive = create_with_child_array(tree,0);
438 PRINTTIME("Building child array structure",Building);
442 PRINTTIME("Preoder traversal",Parsing);
446 for(int i = 0; i < NUM_PATHS; i++)
450 PRINTTIME("10000 random paths",Parsing);
452 std::cerr << "crc = " << crc << "\n";
454 delete_tree_node(naive);
456 PRINTTIME("Freeing structure",Parsing);
461 // Naive pointer structure, allocated in pre-order
463 naive = create_perfect(tree,1);
465 PRINTTIME("Building pre-order pre-allocated contiguous binary structure",Building);
469 PRINTTIME("Preoder traversal",Parsing);
474 PRINTTIME("NextSibling/FirstChild",Parsing);
478 for(int i = 0; i < NUM_PATHS; i++)
482 PRINTTIME("10000 random paths",Parsing);
487 PRINTTIME("Freeing structure",Parsing);
491 // Naive pointer structure, allocated in pre-order
493 naive = create_perfect(tree,0);
495 PRINTTIME("Building post-order pre-allocated contiguous binary structure",Building);
499 PRINTTIME("Preoder traversal",Parsing);
504 PRINTTIME("NextSibling/FirstChild",Parsing);
508 for(int i = 0; i < NUM_PATHS; i++)
512 PRINTTIME("10000 random paths",Parsing);
513 std::cerr << "crc = " << crc << "\n";
517 PRINTTIME("Freeing structure",Parsing);
522 std::cerr << "Timing XMLTree structure\n";
525 xml_tree_preorder(tree,0);
527 PRINTTIME("Preoder traversal",Parsing);
530 xml_tree_nsfc(tree,0);
532 PRINTTIME("NextSibling/FirstChild",Parsing);
535 xml_tree_unranked_preorder(tree,0);
537 PRINTTIME("Unranked pre-order",Parsing);
542 for(int i = 0; i < NUM_PATHS; i++)
543 xml_tree_random_path(tree,0);
546 PRINTTIME("10000 random paths",Parsing);
547 std::cerr << "crc = " << crc << "\n";
550 xml_tree_time_siblings(tree);