patches
authorfclaude <fclaude@3cdefd35-fc62-479d-8e8d-bae585ffb9ca>
Wed, 11 Mar 2009 17:05:27 +0000 (17:05 +0000)
committerfclaude <fclaude@3cdefd35-fc62-479d-8e8d-bae585ffb9ca>
Wed, 11 Mar 2009 17:05:27 +0000 (17:05 +0000)
git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/XMLTree@252 3cdefd35-fc62-479d-8e8d-bae585ffb9ca

XMLTree.cpp
libcds/src/static_sequence/static_sequence.cpp
libcds/src/static_sequence/static_sequence.h
libcds/src/static_sequence/static_sequence_gmr.cpp

index 8dcdaec..9057e31 100644 (file)
@@ -211,6 +211,14 @@ XMLTree *XMLTree::Load(unsigned char *filename, int sample_rate_text)
                s_tree+=2*sizeof(uint)+sizeof(uint)*uint_len(XML_Tree->tags_blen,XML_Tree->tags_len);\r
     s_tree+= XML_Tree->Tags->size();\r
 \r
+               /// FIXME:UGLY tests!\r
+               /*uint * seq = new uint[XML_Tree->tags_len];\r
+               for(uint i=0;i<XML_Tree->tags_len;i++)\r
+                       seq[i] = get_field(XML_Tree->tags_fix,XML_Tree->tags_blen,i);\r
+               cout << "Tags test: " << XML_Tree->Tags->test(seq,XML_Tree->tags_len) << endl;\r
+               delete [] seq;*/\r
+               /// End ugly tests\r
+\r
     s_text = ftell(fp);\r
 \r
     // loads the texts\r
@@ -1030,10 +1038,6 @@ int XMLTree::CloseDocument()
     Par = (bp *)umalloc(sizeof(bp));\r
     bp_construct(Par, npar, par_aux, OPT_DEGREE|0);    \r
     // creates structure for tags\r
-    //static_bitsequence_builder * bmb = new static_bitsequence_builder_brw32(20);\r
-    //static_permutation_builder * pmb = new static_permutation_builder_mrrr(PERM_SAMPLE, bmb);\r
-    //static_sequence_builder * ssb = new static_sequence_builder_gmr_chunk(bmb, pmb);\r
-\r
 \r
     // If we found an attribute then "<@>" is present in the tree\r
     // if we didn't then it is not. "<$>" is never present in the tree\r
@@ -1041,25 +1045,34 @@ int XMLTree::CloseDocument()
                for(uint i=0;i<(uint)npar-1;i++)\r
                        max_tag = max(max_tag,tags_aux[i]);\r
                max_tag++;\r
-    int ntagsize = found_attributes ? 2*ntagnames-1 : 2*ntagnames - 2;\r
                tags_aux = (TagType *) urealloc(tags_aux, sizeof(TagType)*(npar + 1));\r
                tags_aux[npar++] = max_tag;\r
+    //int ntagsize = found_attributes ? 2*ntagnames-1 : 2*ntagnames - 2;\r
+    int ntagsize = 2*ntagnames + 2;\r
 \r
+    //static_bitsequence_builder * bmb = new static_bitsequence_builder_brw32(20);\r
+    //static_permutation_builder * pmb = new static_permutation_builder_mrrr(PERM_SAMPLE, bmb);\r
+    //static_sequence_builder * ssb = new static_sequence_builder_gmr_chunk(bmb, pmb);\r
                static_bitsequence_builder * bmb = new static_bitsequence_builder_brw32(20);\r
                alphabet_mapper *am = new alphabet_mapper_none();\r
                wt_coder * wc = new wt_coder_huff((uint*)tags_aux,npar,am);\r
                Tags = new static_sequence_wvtree((uint*)tags_aux,npar,wc ,bmb, am);\r
-    //Tags = new static_sequence_gmr((uint *) tags_aux, (uint) npar-1,ntagsize, bmb, ssb);\r
+    //Tags = new static_sequence_gmr((uint *) tags_aux, (uint) npar,ntagsize, bmb, ssb);\r
+               \r
+               cout << "Tags test: " << Tags->test((uint*)tags_aux,npar) << endl;\r
+\r
                tags_blen = bits(max_tag);\r
                tags_len = (uint)npar;\r
                tags_fix = new uint[uint_len(tags_blen,tags_len)];\r
-               for(uint i=0;i<(uint)npar-1;i++)\r
+               for(uint i=0;i<(uint)npar;i++)\r
                        set_field(tags_fix,tags_blen,i,tags_aux[i]);\r
     \r
     delete bmb;\r
     //delete pmb;\r
     //delete ssb;\r
-    // makes the text collection static\r
+\r
+    \r
+               // makes the text collection static\r
     if (!disable_tc)\r
       Text->MakeStatic();\r
     \r
index e97a112..05b7753 100644 (file)
@@ -41,3 +41,42 @@ static_sequence * static_sequence::load(FILE * fp) {
   }
   return NULL;
 }
+
+bool static_sequence::test(uint * seq, uint n) {
+       uint sigma = 0;
+       for(uint i=0;i<n;i++)
+               sigma = max(sigma,seq[i]);
+       uint * occ = new uint[sigma+1];
+       for(uint i=0;i<=sigma;i++)
+               occ[i] = 0;
+       for(uint i=0;i<n;i++) {
+               occ[seq[i]]++;
+               if(rank(seq[i],i)!=occ[seq[i]]) {
+                       cout << "rank failed!" << endl;
+                       cout << "rank("<<seq[i]<<","<<i<<")="<<rank(seq[i],i)<<endl;
+                       cout << "expected result: " << occ[seq[i]] << endl;
+                       delete [] occ;
+                       return false;
+               }
+               if(i>0 && rank(seq[i],i-1)!=occ[seq[i]]-1) {
+                       cout << "rank-1 failed!" << endl;
+                       delete [] occ;
+                       return false;
+               }
+               if(select(seq[i],occ[seq[i]])!=i) {
+                       cout << "select failed!" << endl;
+                       cout << "select(" << seq[i] << "," << occ[seq[i]] << ")="<<select(seq[i],occ[seq[i]]) << endl;
+                       cout << "i=" << i << "  rank(" << seq[i] << ",i)=" << rank(seq[i],i) << endl;
+                       delete [] occ;
+                       return false;
+               }
+               if(access(i)!=seq[i]) {
+                       cout << "access failed!" << endl;
+                       delete [] occ;
+                       return false;
+               }
+       }
+       delete [] occ;
+       return true;
+}
+
index f8f3bb5..e3b72dc 100644 (file)
@@ -76,6 +76,8 @@ public:
 
   /** Stores the bitmap given a file pointer, return 0 in case of success */
   virtual uint save(FILE * fp)=0;
+
+       virtual bool test(uint * seq, uint n);
   
   /** Reads a bitmap determining the type */
   static static_sequence * load(FILE * fp);
index ead7748..9203171 100644 (file)
@@ -27,7 +27,7 @@ static_sequence_gmr::static_sequence_gmr(uint * sequence, uint n, uint chunk_len
        uint * new_seq = new uint[len];
   sigma = 0;
        for(uint i=0;i<n;i++){
-               new_seq[i] = sequence[i];
+               new_seq[i] = sequence[i]+1;
     sigma = max(sigma,new_seq[i]);
        }
   sigma++;
@@ -94,19 +94,28 @@ uint * static_sequence_gmr::get_ones(uint * sequence) {
 
 
 uint static_sequence_gmr::rank(uint c, uint j) {
-//   c++;
+  c++;
   uint i = j/chunk_length;
   uint bp = (c)*(len/chunk_length);
   uint rank_pos = B->select0(bp);
   uint prev = rank_pos-bp+1;
   uint sum = B->rank1(B->select0(bp+i)) - prev;
   uint cr = chunk[i]->rank(c,j-i*chunk_length);
+       /*if(c==0) {
+               cout << "c=" << c << " j=" << j << endl;
+               cout << "i=" << i << endl;
+               cout << "bp=" << bp << endl;
+               cout << "rank_pos=" << rank_pos << endl;
+               cout << "prev=" << prev << endl;
+               cout << "sum=" << sum << endl;
+               cout << "cr=" << cr << endl;
+       }*/
   return sum + cr;
 }
 
 
 uint static_sequence_gmr::select(uint c, uint j) {
-//   c++;
+   c++;
   uint rank_pos = B->select0(c*(len/chunk_length));
   uint prev = B->rank1(rank_pos);
   uint sel = prev+j;
@@ -115,12 +124,25 @@ uint static_sequence_gmr::select(uint c, uint j) {
   uint desp = B->rank1(B->select0((i)))-prev;
   if (desp+1==0) desp=0;
   uint rchunk = i%(len/chunk_length);
+       /*if(j==90) {
+               cout << "------------------------------" << endl;
+               cout << "c=" << c << "  j=" << j << endl;
+               cout << "chunk_length=" << chunk_length << endl;
+               cout << "rank_pos=" << rank_pos << endl;
+               cout << "prev=" << prev << endl;
+               cout << "sel=" << sel << endl;
+               cout << "block=" << block << endl;
+               cout << "i=" << i << endl;
+               cout << "desp=" << desp << endl;
+               cout << "rchunk=" << rchunk << endl;
+               cout << "j-desp=" << j-desp << endl;
+       }*/
   return (rchunk*chunk_length)+chunk[rchunk]->select(c, j-desp);
 }
 
 
 uint static_sequence_gmr::access(uint j) {
-  return chunk[j/chunk_length]->access(j%chunk_length);
+  return chunk[j/chunk_length]->access(j%chunk_length)-1;
 }