X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=XMLTree.cpp;h=ae80fc2a489680d270c6fba2d59939a8c9bddc00;hb=b53633fb64f387edb5cebefbb3308b6347b2389c;hp=8dcdaec6904f8e017e064e912fa129ced60a8b57;hpb=c1865639eb1a804ff9363035a2a656b64fd98b9e;p=SXSI%2FXMLTree.git diff --git a/XMLTree.cpp b/XMLTree.cpp index 8dcdaec..ae80fc2 100644 --- a/XMLTree.cpp +++ b/XMLTree.cpp @@ -211,6 +211,14 @@ XMLTree *XMLTree::Load(unsigned char *filename, int sample_rate_text) s_tree+=2*sizeof(uint)+sizeof(uint)*uint_len(XML_Tree->tags_blen,XML_Tree->tags_len); s_tree+= XML_Tree->Tags->size(); + /// FIXME:UGLY tests! + /*uint * seq = new uint[XML_Tree->tags_len]; + for(uint i=0;itags_len;i++) + seq[i] = get_field(XML_Tree->tags_fix,XML_Tree->tags_blen,i); + cout << "Tags test: " << XML_Tree->Tags->test(seq,XML_Tree->tags_len) << endl; + delete [] seq;*/ + /// End ugly tests + s_text = ftell(fp); // loads the texts @@ -1030,36 +1038,42 @@ int XMLTree::CloseDocument() Par = (bp *)umalloc(sizeof(bp)); bp_construct(Par, npar, par_aux, OPT_DEGREE|0); // creates structure for tags - //static_bitsequence_builder * bmb = new static_bitsequence_builder_brw32(20); - //static_permutation_builder * pmb = new static_permutation_builder_mrrr(PERM_SAMPLE, bmb); - //static_sequence_builder * ssb = new static_sequence_builder_gmr_chunk(bmb, pmb); - // If we found an attribute then "<@>" is present in the tree // if we didn't then it is not. "<$>" is never present in the tree - uint max_tag = 0; - for(uint i=0;i<(uint)npar-1;i++) - max_tag = max(max_tag,tags_aux[i]); - max_tag++; - int ntagsize = found_attributes ? 2*ntagnames-1 : 2*ntagnames - 2; - tags_aux = (TagType *) urealloc(tags_aux, sizeof(TagType)*(npar + 1)); - tags_aux[npar++] = max_tag; - - static_bitsequence_builder * bmb = new static_bitsequence_builder_brw32(20); + //uint max_tag = 0; + //for(uint i=0;i<(uint)npar-1;i++) + // max_tag = max(max_tag,tags_aux[i]); + //max_tag++; + //tags_aux = (TagType *) urealloc(tags_aux, sizeof(TagType)*(npar + 1)); + //tags_aux[npar++] = max_tag; + //int ntagsize = found_attributes ? 2*ntagnames-1 : 2*ntagnames - 2; + int ntagsize = 2*ntagnames + 2; + + //static_bitsequence_builder * bmb = new static_bitsequence_builder_brw32(20); + //static_permutation_builder * pmb = new static_permutation_builder_mrrr(PERM_SAMPLE, bmb); + //static_sequence_builder * ssb = new static_sequence_builder_gmr_chunk(bmb, pmb); + static_bitsequence_builder * bmb = new static_bitsequence_builder_sdarray(); alphabet_mapper *am = new alphabet_mapper_none(); - wt_coder * wc = new wt_coder_huff((uint*)tags_aux,npar,am); - Tags = new static_sequence_wvtree((uint*)tags_aux,npar,wc ,bmb, am); - //Tags = new static_sequence_gmr((uint *) tags_aux, (uint) npar-1,ntagsize, bmb, ssb); + //wt_coder * wc = new wt_coder_huff((uint*)tags_aux,npar,am); + //Tags = new static_sequence_wvtree((uint*)tags_aux,npar,wc ,bmb, am); + //Tags = new static_sequence_gmr((uint *) tags_aux, (uint) npar,ntagsize, bmb, ssb); + Tags = new static_sequence_bs((uint*)tags_aux,npar,am,bmb); + + cout << "Tags test: " << Tags->test((uint*)tags_aux,npar) << endl; + tags_blen = bits(max_tag); tags_len = (uint)npar; tags_fix = new uint[uint_len(tags_blen,tags_len)]; - for(uint i=0;i<(uint)npar-1;i++) + for(uint i=0;i<(uint)npar;i++) set_field(tags_fix,tags_blen,i,tags_aux[i]); delete bmb; //delete pmb; //delete ssb; - // makes the text collection static + + + // makes the text collection static if (!disable_tc) Text->MakeStatic(); @@ -1239,7 +1253,7 @@ TagType XMLTree::GetTagId(unsigned char *tagname) // this should be changed for more efficient processing for (i=0; i= ntagnames) return NULL; // invalid tag identifier s = (unsigned char *)umalloc((strlen((const char *)TagName[tagid])+1)*sizeof(unsigned char)); strcpy((char *)s, (const char *)TagName[tagid]); @@ -1261,6 +1275,7 @@ unsigned char *XMLTree::GetTagName(TagType tagid) const unsigned char *XMLTree::GetTagNameByRef(TagType tagid) { + if(tagid==(uint)-1) return NULL; if (tagid >= ntagnames) return NULL; // invalid tag identifier return ((const unsigned char*) TagName[tagid]); }