From add1a8a0b8a7ca05bb099ec38a60f3c52b9dfd71 Mon Sep 17 00:00:00 2001 From: kim Date: Wed, 28 Jan 2009 00:06:34 +0000 Subject: [PATCH] Hardcode "<@>" and "<$>" at position 0 and 1 in the TagName table. These are used to denote attributes and PCDATA nodes. git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/XMLTree@81 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- XMLTree.cpp | 40 ++++++++++++++++++++++++++++++++++++---- XMLTree.h | 1 + 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/XMLTree.cpp b/XMLTree.cpp index 22ba4bb..fe01896 100644 --- a/XMLTree.cpp +++ b/XMLTree.cpp @@ -574,9 +574,10 @@ int XMLTree::OpenDocument(bool empty_texts, int sample_rate_text) { initialized = true; finished = false; + found_attributes = false; npar = 0; parArraySize = 1; - ntagnames = 0; + ntagnames = 2; indexing_empty_texts = empty_texts; @@ -592,7 +593,28 @@ int XMLTree::OpenDocument(bool empty_texts, int sample_rate_text) return NULLT; } - TagName = NULL; + TagName = (unsigned char **) malloc(2*sizeof(unsigned char*)); + if (!TagName){ + fprintf(stderr, "Error: not enough memory\n"); + return NULLT; + } + + TagName[0] = (unsigned char *) malloc(4*sizeof(unsigned char)); + strcpy((char *) TagName[0], "<@>"); + + if (!TagName[0]){ + fprintf(stderr, "Error: not enough memory\n"); + return NULLT; + } + + TagName[1] = (unsigned char *) malloc(4*sizeof(unsigned char)); + if (!TagName[1]){ + fprintf(stderr, "Error: not enough memory\n"); + return NULLT; + } + + strcpy((char *) TagName[1], "<$>"); + if (!indexing_empty_texts) { empty_texts_aux = (unsigned int *)malloc(sizeof(unsigned int)); @@ -633,7 +655,12 @@ int XMLTree::CloseDocument() static_permutation_builder * pmb = new static_permutation_builder_mrrr(PERM_SAMPLE, bmb); static_sequence_builder * ssb = new static_sequence_builder_gmr_chunk(bmb, pmb); - Tags = new static_sequence_gmr((uint *) tags_aux, (uint) npar-1,2*ntagnames, bmb, ssb); + + // If we found an attribute then "<@>" is present in the tree + // if we didn't then it is not. "<$>" is never present in the tree + int ntagsize = found_attributes ? 2*ntagnames-1 : 2*ntagnames - 2; + + Tags = new static_sequence_gmr((uint *) tags_aux, (uint) npar-1,ntagsize, bmb, ssb); delete bmb; delete pmb; @@ -679,8 +706,13 @@ int XMLTree::NewOpenTag(unsigned char *tagname) // transforms the tagname into a tag identifier. If the tag is new, we insert // it in the table. for (i=0; i") was called + if (i==0) + found_attributes=true; + if (i==ntagnames) { // the tag is a new one, then we insert it TagName = (unsigned char **)realloc(TagName, sizeof(char *)*(ntagnames+1)); diff --git a/XMLTree.h b/XMLTree.h index 88aae9a..cc5f089 100644 --- a/XMLTree.h +++ b/XMLTree.h @@ -90,6 +90,7 @@ class XMLTree { int parArraySize; int ntagnames; unsigned int *empty_texts_aux; + bool found_attributes; public: -- 2.17.1