X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=SWCSAWrapper.h;fp=SWCSAWrapper.h;h=8081ec750b5a67a2422ec5e575c5ee46be850ec4;hb=13e254b7c0ee22dffbc7c3125cee0408f9b375da;hp=731fc834394a59053ecc7893097cf8a82baba8e4;hpb=e4b6bdc7cc2a1372e4d4dae50acac55cebcc7e9b;p=SXSI%2FTextCollection.git diff --git a/SWCSAWrapper.h b/SWCSAWrapper.h index 731fc83..8081ec7 100644 --- a/SWCSAWrapper.h +++ b/SWCSAWrapper.h @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (C) 2010 by Niko Välimäki * * * - * FMIndex implementation for the TextCollection interface * + * SWCSA implementation for the TextCollection interface * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU Lesser General Public License as published * @@ -57,7 +57,7 @@ class SWCSAWrapper : public SXSI::TextCollection { public: SWCSAWrapper(uchar * text, ulong length, unsigned samplerate, unsigned numberOfTexts_) - : index(0), offsets(0), n(length), seSize(0), numberOfTexts(numberOfTexts_) + : index(0), offsets(0), offit(0), n(length), seSize(0), numberOfTexts(numberOfTexts_) { // Inicializes the arrays used to detect if a char is valid or not. StartValid(); @@ -125,7 +125,9 @@ public: fprintf(stderr,"\n-----------------------\nnumber of words = %lu\n------------------------\n", seSize); encoder.setBit(seSize); offsets = new CSA::DeltaVector(encoder, seSize+1); - std::cerr << "Number of texts: " << numberOfTexts << " vs " << offsets->rank(seSize - 1) << std::endl; + offit = new CSA::DeltaVector::Iterator(*offsets); + + std::cerr << "Number of texts: " << numberOfTexts << " vs " << offit->rank(seSize - 1) << std::endl; char opt[100]; snprintf(opt, 99, "sA=%u;sAinv=%u;sPsi=%u", samplerate, samplerate, samplerate); @@ -147,6 +149,7 @@ public: std::exit(r); } index = 0; + delete offit; offit = 0; delete offsets; offsets = 0; } @@ -182,8 +185,8 @@ public: { ulong from, to, l; uchar *text; - from = offsets->select(i); - to = offsets->select(i+1); // ADD one 1-bit in to end!!! + from = offit->select(i); + to = offit->select(i+1); // ADD one 1-bit in to end!!! int r = extractWords(index, from, to, &text, &l); if (r) @@ -249,7 +252,7 @@ public: document_result dr; dr.reserve(numocc+1); for (ulong i = 0; i < numocc; ++i) - dr.push_back(offsets->rank(occ[i])-1); + dr.push_back(offit->rank(occ[i])-1); free(occ); return dr; @@ -272,7 +275,7 @@ public: // Index from/to disk SWCSAWrapper(FILE *file, char const *filename) - : index(0), offsets(0), n(0), seSize(0), numberOfTexts(0) + : index(0), offsets(0), offit(0), n(0), seSize(0), numberOfTexts(0) { uchar verFlag = 0; if (std::fread(&verFlag, 1, 1, file) != 1) @@ -287,7 +290,8 @@ public: if (std::fread(&(this->numberOfTexts), sizeof(unsigned), 1, file) != 1) throw std::runtime_error("FMIndex::Load(): file read error (numberOfTexts)."); - offsets = new CSA::DeltaVector(file); + offsets = new CSA::DeltaVector(file); + offit = new CSA::DeltaVector::Iterator(*offsets); // FIXME Const correctness is broken! int r = load_index((char *)filename, &index); @@ -331,6 +335,7 @@ public: private: void *index; CSA::DeltaVector *offsets; + CSA::DeltaVector::Iterator *offit; TextPosition n; ulong seSize;