-\r
-// OpenDocument(empty_texts): it starts the construction of the data structure for\r
-// the XML document. Parameter empty_texts indicates whether we index empty texts\r
-// in document or not. Returns a non-zero value upon success, NULLT in case of error.\r
-int XMLTree::OpenDocument(bool empty_texts, int sample_rate_text,bool dtc)\r
- {\r
- initialized = true;\r
- finished = false;\r
- found_attributes = false;\r
- npar = 0;\r
- parArraySize = 1;\r
- ntagnames = 2; \r
- disable_tc = dtc;\r
- \r
- indexing_empty_texts = empty_texts;\r
- \r
- par_aux = (pb *)umalloc(sizeof(pb)*parArraySize);\r
- \r
- tags_aux = (TagType *) umalloc(sizeof(TagType));\r
- \r
- TagName = (unsigned char **) umalloc(2*sizeof(unsigned char*));\r
-\r
- TagName[0] = (unsigned char *) umalloc(4*sizeof(unsigned char));\r
-\r
- strcpy((char *) TagName[0], "<@>");\r
-\r
- TagName[1] = (unsigned char *) umalloc(4*sizeof(unsigned char));\r
-\r
- strcpy((char *) TagName[1], "<$>");\r
-\r
-\r
- if (!indexing_empty_texts) \r
- empty_texts_aux = (unsigned int *)umalloc(sizeof(unsigned int));\r
- \r
- \r
- \r
- Text = TextCollection::InitTextCollection((unsigned)sample_rate_text);\r
- \r
- return 1; // indicates success in the initialization of the data structure\r
- }\r
-\r
-// CloseDocument(): it finishes the construction of the data structure for the XML\r
-// document. Tree and tags are represented in the final form, dynamic data \r
-// structures are made static, and the flag "finished" is set to true. After that, \r
-// the data structure can be queried.\r
-int XMLTree::CloseDocument()\r
- {\r
- if (!initialized) { // data structure has not been initialized properly\r
- fprintf(stderr, "Error: data structure has not been initialized properly (by calling method OpenDocument)\n");\r
- return NULLT;\r
- }\r
- \r
- // closing parenthesis for the tree root\r
- par_aux = (pb *)urealloc(par_aux, sizeof(pb)*(1+npar/(8*sizeof(pb))));\r
- \r
- // creates the data structure for the tree topology\r
- Par = (bp *)umalloc(sizeof(bp));\r
- bp_construct(Par, npar, par_aux, OPT_DEGREE|0); \r
- // creates structure for tags\r
- static_bitsequence_builder * bmb = new static_bitsequence_builder_brw32(20);\r
- static_permutation_builder * pmb = new static_permutation_builder_mrrr(PERM_SAMPLE, bmb);\r
- static_sequence_builder * ssb = new static_sequence_builder_gmr_chunk(bmb, pmb);\r
-\r
-\r
- // If we found an attribute then "<@>" is present in the tree\r
- // if we didn't then it is not. "<$>" is never present in the tree\r
- int ntagsize = found_attributes ? 2*ntagnames-1 : 2*ntagnames - 2;\r
-\r
- Tags = new static_sequence_gmr((uint *) tags_aux, (uint) npar-1,ntagsize, bmb, ssb);\r
- \r
- delete bmb;\r
- delete pmb;\r
- delete ssb;\r
- // makes the text collection static\r
- if (!disable_tc)\r
- Text->MakeStatic();\r
- \r
- // creates the data structure marking the non-empty texts (just in the case it is necessary)\r
- if (!indexing_empty_texts) {\r
- EBVector = new static_bitsequence_rrr02((uint *)empty_texts_aux,(ulong)npar,(uint)32);\r
- free (empty_texts_aux);\r
- empty_texts_aux = NULL;\r
- }\r
- \r
- // OJO was leaked before, found by valgrind\r
- free(tags_aux);\r
-\r
- tags_aux = NULL;\r
-\r
- finished = true;\r
-\r
- return 1; // indicates success in the inicialization\r
- }\r
-\r
-\r
-// NewOpenTag(tagname): indicates the event of finding a new opening tag in the document.\r
-// Tag name is given. Returns a non-zero value upon success, and returns NULLT\r
-// in case of failing when trying to insert the new tag.\r
-int XMLTree::NewOpenTag(unsigned char *tagname)\r
- {\r
- int i;\r
-\r
- if (!initialized) { // data structure has not been initialized properly\r
- fprintf(stderr, "Error: you cannot insert a new opening tag without first calling method OpenDocument first\n");\r
- return NULLT;\r
- }\r
- \r
- // inserts a new opening parentheses in the bit sequence\r
- if (sizeof(pb)*8*parArraySize == npar) { // no space left for the new parenthesis\r
- par_aux = (pb *)urealloc(par_aux, sizeof(pb)*2*parArraySize);\r
- parArraySize *= 2;\r
- }\r
- \r
- setbit(par_aux,npar,OP); // marks a new opening parenthesis\r
-\r
- // transforms the tagname into a tag identifier. If the tag is new, we insert\r
- // it in the table.\r
- for (i=0; i<ntagnames; i++)\r
- if (strcmp((const char *)tagname,(const char *)TagName[i])==0) break;\r
- \r
-\r
- // NewOpenTag("<@>") was called\r
- if (i==0) \r
- found_attributes=true;\r
-\r
- if (i==ntagnames) { // the tag is a new one, then we insert it\r
- TagName = (unsigned char **)urealloc(TagName, sizeof(char *)*(ntagnames+1));\r
- \r
- if (!TagName) {\r
- fprintf(stderr, "Error: not enough memory\n");\r
- return NULLT;\r
- }\r
- \r
- ntagnames++;\r
- TagName[i] = (unsigned char *)umalloc(sizeof(unsigned char)*(strlen((const char *)tagname)+1));\r
- strcpy((char *)TagName[i], (const char *)tagname);\r
- } \r
- tags_aux = (TagType *) urealloc(tags_aux, sizeof(TagType)*(npar + 1));\r
-\r
- tags_aux[npar] = i; // inserts the new tag id within the preorder sequence of tags\r
- \r
- npar++;\r
- \r
- return 1;\r
- \r
- }\r
-\r
-\r
-// NewClosingTag(tagname): indicates the event of finding a new closing tag in the document.\r
-// Tag name is given. Returns a non-zero value upon success, and returns NULLT\r
-// in case of failing when trying to insert the new tag.\r
-int XMLTree::NewClosingTag(unsigned char *tagname)\r
- {\r
- int i;\r
-\r
- if (!initialized) { // data structure has not been initialized properly\r
- fprintf(stderr, "Error: you cannot insert a new closing tag without first calling method OpenDocument first\n");\r
- return NULLT;\r
- }\r
- \r
- // inserts a new closing parentheses in the bit sequence\r
- if (sizeof(pb)*8*parArraySize == npar) { // no space left for the new parenthesis\r
- par_aux = (pb *)urealloc(par_aux, sizeof(pb)*2*parArraySize);\r
- parArraySize *= 2;\r
- }\r
- \r
- setbit(par_aux,npar,CP); // marks a new closing parenthesis\r
-\r
- // transforms the tagname into a tag identifier. If the tag is new, we insert\r
- // it in the table.\r
- for (i=0; i<ntagnames; i++)\r
- if ((strcmp((const char *)tagname,(const char *)(TagName[i]+1))==0) && (TagName[i][0]=='/')) break;\r
- \r
- if (i==ntagnames) { // the tag is a new one, then we insert it\r
- TagName = (unsigned char **)urealloc(TagName, sizeof(char *)*(ntagnames+1));\r
- \r
- ntagnames++;\r
- TagName[i] = (unsigned char *)umalloc(sizeof(char)*(strlen((const char *)tagname)+2));\r
- TagName[i][0] = '/';\r
- strcpy((char *)&(TagName[i][1]), (const char *)tagname);\r
- } \r
-\r
- tags_aux = (TagType *)urealloc(tags_aux, sizeof(TagType)*(npar + 1));\r
-\r
- tags_aux[npar] = i; // inserts the new tag id within the preorder sequence of tags\r
- \r
- npar++;\r
-\r
- return 1; // success\r
- \r
- }\r
-\r
-\r
-// NewText(s): indicates the event of finding a new (non-empty) text s in the document.\r
-// The new text is inserted within the text collection. Returns a non-zero value upon\r
-// success, NULLT in case of error.\r
-int XMLTree::NewText(unsigned char *s)\r
- {\r
- if (!initialized) { // data structure has not been initialized properly\r
- fprintf(stderr, "Error: you cannot insert a new text without first calling method OpenDocument first\n");\r
- return NULLT;\r
- }\r
-\r
- if (disable_tc) {\r
- XMLTree::NewEmptyText();\r
- return 1;\r
- };\r
-\r
- if (!indexing_empty_texts) {\r
- empty_texts_aux = (unsigned int *)urealloc(empty_texts_aux, sizeof(pb)*(1+(npar-1)/(8*sizeof(pb))));\r
- bitset(empty_texts_aux, npar-1); // marks the non-empty text with a 1 in the bit vector\r
- }\r
- \r
- Text->InsertText(s);\r
- string cpps = (char*) s;\r
- CachedText.push_back(cpps); \r
- \r
- return 1; // success\r
- }\r
-\r
-// NewEmptyText(): indicates the event of finding a new empty text in the document.\r
-// In case of indexing empty and non-empty texts, we insert the empty texts into the\r
-// text collection. In case of indexing only non-empty texts, it just indicates an\r
-// empty text in the bit vector of empty texts. Returns a non-zero value upon\r
-// success, NULLT in case of error.\r
-int XMLTree::NewEmptyText() \r
- {\r
- unsigned char c = 0;\r
- if (!initialized) { // data structure has not been initialized properly\r
- fprintf(stderr, "Error: you cannot insert a new empty text without first calling method OpenDocument first\n");\r
- return NULLT;\r
- }\r
-\r
- if (!indexing_empty_texts) {\r
- empty_texts_aux = (unsigned int *)urealloc(empty_texts_aux, sizeof(pb)*(1+(npar-1)/(8*sizeof(pb))));\r
- \r
- bitclean(empty_texts_aux, npar-1); // marks the empty text with a 0 in the bit vector\r
- }\r
- else Text->InsertText(&c); // we insert the empty text just in case we index all the texts\r
- \r
- return 1; // success \r
- }\r
-\r
-\r