\r
XMLTree::XMLTree( pb * const par, uint npar, vector<string> * const TN, TagIdMap * const tim,\r
uint *empty_texts_bmp, TagType *tags,\r
- TextCollection * const TC, bool dis_tc,\r
+ TextCollectionBuilder * const TCB, bool dis_tc,\r
TextCollectionBuilder::index_type_t _index_type )\r
{\r
buffer = 0;\r
print_stack = 0;\r
// creates the data structure for the tree topology\r
STARTTIMER();\r
- Par = bp_construct(npar, (pb*) par, OPT_FAST_PREORDER_SELECT | OPT_DEGREE|0);\r
+ Par = bp_construct(npar, (pb*) par, OPT_DEGREE|0);\r
STOPTIMER(Building);\r
PRINTTIME("Building parenthesis struct", Building);\r
STARTTIMER();\r
\r
uint max_tag = TN->size() - 1;\r
\r
-\r
static_bitsequence_builder *bmb = new static_bitsequence_builder_sdarray();\r
alphabet_mapper *am = new alphabet_mapper_none();\r
Tags = new static_sequence_bs((uint*)tags,npar,am,bmb);\r
STOPTIMER(Building);\r
PRINTTIME("Building Tag Structure", Building);\r
\r
- Text = (TextCollection*) TC;\r
-\r
-\r
EBVector = new static_bitsequence_rrr02(empty_texts_bmp,npar,32);\r
- //EBVector = new static_bitsequence_sdarray(empty_texts_bmp,npar);\r
free(empty_texts_bmp);\r
empty_texts_bmp = NULL;\r
\r
\r
disable_tc = dis_tc;\r
text_index_type = _index_type;\r
+ if (!disable_tc) {\r
+ assert(TCB != 0);\r
+ STARTTIMER();\r
+ Text = TCB->InitTextCollection();\r
+ delete TCB;\r
+ STOPTIMER(Building);\r
+ PRINTTIME("Building TextCollection", Building);\r
+\r
+ } else {\r
+ Text = NULL;\r
+ }\r
+\r
std::cerr << "Number of distinct tags " << TagName->size() << "\n";\r
//std::cerr.flush();\r
}\r
{\r
FILE *fp;\r
int i;\r
-\r
- fp = fdopen(fd, "wa");\r
+ off_t pos = lseek(fd, 0, SEEK_CUR);\r
+ int fd2 = dup(fd);\r
+ fp = fdopen(fd2, "w");\r
+ fseek(fp, pos, SEEK_SET);\r
// first stores the tree topology\r
saveTree(Par, fp);\r
\r
\r
//text positions\r
EBVector->save(fp);\r
-\r
+ std::cerr << "TC Index position: " << ftell(fp) << std::endl;\r
// stores the texts\r
if (!disable_tc) {\r
-\r
+ std::cerr << "Writing " << sizeof(TextCollectionBuilder::index_type_t) << " bytes\n" << std::endl;\r
ufwrite(&text_index_type, sizeof(TextCollectionBuilder::index_type_t), 1, fp);\r
\r
\r
string file(name);\r
switch (text_index_type){\r
case TextCollectionBuilder::index_type_default:\r
- file.append(".default");\r
+ file.append("_default");\r
break;\r
case TextCollectionBuilder::index_type_swcsa:\r
- file.append(".swcsa");\r
+ file.append("_swcsa");\r
break;\r
case TextCollectionBuilder::index_type_rlcsa:\r
- file.append(".rlcsa");\r
+ file.append("_rlcsa");\r
break;\r
};\r
\r
\r
\r
}\r
+ fflush(fp);\r
+ fclose(fp);\r
}\r
\r
// Load: loads XML tree data structure from file. Returns\r
STOPTIMER(Loading);\r
PRINTTIME("Loading text bitvector struct", Loading);\r
STARTTIMER();\r
-\r
+ std::cerr << "TC Load Index position: " << ftell(fp) << std::endl;\r
// Not used\r
// loads the texts\r
if (!XML_Tree->disable_tc){\r
string file(name);\r
switch (XML_Tree->text_index_type){\r
case TextCollectionBuilder::index_type_default:\r
- file.append(".default");\r
+ file.append("_default");\r
break;\r
case TextCollectionBuilder::index_type_swcsa:\r
- file.append(".swcsa");\r
+ file.append("_swcsa");\r
break;\r
case TextCollectionBuilder::index_type_rlcsa:\r
- file.append(".rlcsa");\r
+ file.append("_rlcsa");\r
break;\r
};\r
+\r
+\r
XML_Tree->Text = TextCollection::Load(fp, file.c_str(), TextCollection::index_mode_default, sample_factor);\r
\r
}\r