X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=XMLTree.cpp;h=ecc9a2786ee545c97deb9e4a76b465c2c0d466ce;hb=46dd7565a6c8d813459b81cf2b19446db1b51010;hp=21658e99c323c4aaaff8b72e385d8580542bddf4;hpb=22ee86b6a6088bf49747da202d619439fd8880e2;p=SXSI%2FXMLTree.git diff --git a/XMLTree.cpp b/XMLTree.cpp index 21658e9..ecc9a27 100644 --- a/XMLTree.cpp +++ b/XMLTree.cpp @@ -570,14 +570,16 @@ treeNode XMLTree::ParentNode(DocID d) // OpenDocument(empty_texts): it starts the construction of the data structure for // the XML document. Parameter empty_texts indicates whether we index empty texts // in document or not. Returns a non-zero value upon success, NULLT in case of error. -int XMLTree::OpenDocument(bool empty_texts, int sample_rate_text) +int XMLTree::OpenDocument(bool empty_texts, int sample_rate_text,bool dtc) { initialized = true; finished = false; + found_attributes = false; npar = 0; parArraySize = 1; - ntagnames = 0; - + ntagnames = 2; + disable_tc = dtc; + indexing_empty_texts = empty_texts; par_aux = (pb *)malloc(sizeof(pb)*parArraySize); @@ -592,7 +594,28 @@ int XMLTree::OpenDocument(bool empty_texts, int sample_rate_text) return NULLT; } - TagName = NULL; + TagName = (unsigned char **) malloc(2*sizeof(unsigned char*)); + if (!TagName){ + fprintf(stderr, "Error: not enough memory\n"); + return NULLT; + } + + TagName[0] = (unsigned char *) malloc(4*sizeof(unsigned char)); + strcpy((char *) TagName[0], "<@>"); + + if (!TagName[0]){ + fprintf(stderr, "Error: not enough memory\n"); + return NULLT; + } + + TagName[1] = (unsigned char *) malloc(4*sizeof(unsigned char)); + if (!TagName[1]){ + fprintf(stderr, "Error: not enough memory\n"); + return NULLT; + } + + strcpy((char *) TagName[1], "<$>"); + if (!indexing_empty_texts) { empty_texts_aux = (unsigned int *)malloc(sizeof(unsigned int)); @@ -633,17 +656,29 @@ int XMLTree::CloseDocument() static_permutation_builder * pmb = new static_permutation_builder_mrrr(PERM_SAMPLE, bmb); static_sequence_builder * ssb = new static_sequence_builder_gmr_chunk(bmb, pmb); - Tags = new static_sequence_gmr((uint *) tags_aux, (uint) npar-1,2*ntagnames, bmb, ssb); + + // If we found an attribute then "<@>" is present in the tree + // if we didn't then it is not. "<$>" is never present in the tree + int ntagsize = found_attributes ? 2*ntagnames-1 : 2*ntagnames - 2; + + Tags = new static_sequence_gmr((uint *) tags_aux, (uint) npar-1,ntagsize, bmb, ssb); delete bmb; delete pmb; delete ssb; // makes the text collection static - Text->MakeStatic(); + if (!disable_tc) + Text->MakeStatic(); // creates the data structure marking the non-empty texts (just in the case it is necessary) - if (!indexing_empty_texts) + if (!indexing_empty_texts) { EBVector = new static_bitsequence_rrr02((uint *)empty_texts_aux,(ulong)npar,(uint)32); + free (empty_texts_aux); + empty_texts_aux = NULL; + } + + free(tags_aux); + tags_aux = NULL; finished = true; @@ -679,8 +714,13 @@ int XMLTree::NewOpenTag(unsigned char *tagname) // transforms the tagname into a tag identifier. If the tag is new, we insert // it in the table. for (i=0; i") was called + if (i==0) + found_attributes=true; + if (i==ntagnames) { // the tag is a new one, then we insert it TagName = (unsigned char **)realloc(TagName, sizeof(char *)*(ntagnames+1)); @@ -777,6 +817,11 @@ int XMLTree::NewText(unsigned char *s) return NULLT; } + if (disable_tc) { + XMLTree::NewEmptyText(); + return 1; + }; + if (!indexing_empty_texts) { empty_texts_aux = (unsigned int *)realloc(empty_texts_aux, sizeof(pb)*(1+(npar-1)/(8*sizeof(pb)))); if (!empty_texts_aux) { @@ -846,4 +891,19 @@ unsigned char *XMLTree::GetTagName(TagType tagid) } - +TagType XMLTree::RegisterTag(unsigned char *tagname) +{ + if (!finished) + return NULLT; + + + TagType id = XMLTree::GetTagId(tagname); + if (id == NULLT){ + id = ntagnames; + ntagnames = ntagnames + 1; + TagName = (unsigned char **) realloc(TagName,ntagnames*(sizeof(unsigned char*))); + strcpy((char*)TagName[id], (const char *)tagname); + }; + + return id; +}