X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=XMLTree.cpp;h=40d80555a6838486be8b3b9ac3cfa59a8e9c2867;hb=8b92ac7e539c796ee3160078b5ca30537f26ea51;hp=5033877787b50a6adb4c768732f6a45834a90521;hpb=44c3b5aabb8782b15e66d7d14ab19b280d7eb20f;p=SXSI%2FXMLTree.git diff --git a/XMLTree.cpp b/XMLTree.cpp index 5033877..40d8055 100644 --- a/XMLTree.cpp +++ b/XMLTree.cpp @@ -66,9 +66,11 @@ static uint fast_get_field(uint* A,int len, int idx) XMLTree::XMLTree( pb * const par, uint npar, vector * const TN, TagIdMap * const tim, uint *empty_texts_bmp, TagType *tags, - TextCollection * const TC, bool dis_tc) + TextCollection * const TC, bool dis_tc, + TextCollectionBuilder::index_type_t _index_type ) { buffer = 0; + print_stack = 0; // creates the data structure for the tree topology Par = (bp *)umalloc(sizeof(bp)); STARTTIMER(); @@ -117,8 +119,7 @@ XMLTree::XMLTree( pb * const par, uint npar, vector * const TN, TagIdM disable_tc = dis_tc; - stream = NULL; - stream_fd = 0; + text_index_type = _index_type; std::cerr << "Number of distinct tags " << TagName->size() << "\n"; //std::cerr.flush(); } @@ -146,11 +147,6 @@ XMLTree::~XMLTree() delete EBVector; EBVector = NULL; - if (stream != NULL){ - fclose(stream); - stream = NULL; - stream_fd = 0; - }; } @@ -167,7 +163,7 @@ void XMLTree::print_stats() } // Save: saves XML tree data structure to file. -void XMLTree::Save(int fd, char *filename) +void XMLTree::Save(int fd) { FILE *fp; char filenameaux[1024]; @@ -199,14 +195,31 @@ void XMLTree::Save(int fd, char *filename) // stores the texts if (!disable_tc) { - Text->Save(fp, filename); - }; - } + ufwrite(&text_index_type, sizeof(TextCollectionBuilder::index_type_t), 1, fp); + + const char * pref; + switch (text_index_type){ + case TextCollectionBuilder::index_type_default: + pref = "default_"; + break; + case TextCollectionBuilder::index_type_swcsa: + pref = "swcsa_"; + break; + case TextCollectionBuilder::index_type_rlcsa: + pref = "rlcsa_"; + break; + }; + + Text->Save(fp, pref); + + + } + } // Load: loads XML tree data structure from file. Returns // a pointer to the loaded data structure -XMLTree *XMLTree::Load(int fd, char *filename, bool load_tc,int sample_factor) +XMLTree *XMLTree::Load(int fd, bool load_tc,int sample_factor) { FILE *fp; @@ -277,32 +290,44 @@ XMLTree *XMLTree::Load(int fd, char *filename, bool load_tc,int sample_factor) ufread(&(XML_Tree->disable_tc), sizeof(bool), 1, fp); if (load_tc) { - XML_Tree->EBVector = static_bitsequence_rrr02::load(fp); - //XML_Tree->EBVector = static_bitsequence_sdarray::load(fp); + XML_Tree->EBVector = static_bitsequence_rrr02::load(fp); - STOPTIMER(Loading); - PRINTTIME("Loading text bitvector struct", Loading); - STARTTIMER(); - - // Not used - // loads the texts - if (!XML_Tree->disable_tc){ - XML_Tree->Text = TextCollection::Load(fp, filename, TextCollection::index_mode_default, sample_factor); - } - else XML_Tree->Text = NULL; - STOPTIMER(Loading); - PRINTTIME("Loading TextCollection", Loading); - STARTTIMER(); + STOPTIMER(Loading); + PRINTTIME("Loading text bitvector struct", Loading); + STARTTIMER(); + + // Not used + // loads the texts + if (!XML_Tree->disable_tc){ + ufread(&(XML_Tree->text_index_type), + sizeof(TextCollectionBuilder::index_type_t), 1, fp); + const char * pref; + switch (!XML_Tree->text_index_type){ + case TextCollectionBuilder::index_type_default: + pref = "default_"; + break; + case TextCollectionBuilder::index_type_swcsa: + pref = "swcsa_"; + break; + case TextCollectionBuilder::index_type_rlcsa: + pref = "rlcsa_"; + break; + }; + XML_Tree->Text = TextCollection::Load(fp, pref, TextCollection::index_mode_default, sample_factor); + + } + else XML_Tree->Text = NULL; + STOPTIMER(Loading); + PRINTTIME("Loading TextCollection", Loading); + STARTTIMER(); } else { XML_Tree->EBVector = NULL; XML_Tree->Text = NULL; XML_Tree->disable_tc = true; }; - - XML_Tree->stream = NULL; - XML_Tree->stream_fd = 0; + return XML_Tree; } @@ -500,12 +525,12 @@ treeNode XMLTree::NextElement(treeNode x) }*/ // LastChild(x): returns the last child of node x. -treeNode XMLTree::LastChild(treeNode x) + /*treeNode XMLTree::LastChild(treeNode x) { NULLT_IF(x == NULLT || fast_isleaf(Par,x)); return find_open(Par, fast_find_close(Par, x)-1); } - + */ // NextSibling(x): returns the next sibling of node x, assuming it exists. /*treeNode XMLTree::NextSibling(treeNode x) { @@ -516,12 +541,12 @@ treeNode XMLTree::LastChild(treeNode x) */ // PrevSibling(x): returns the previous sibling of node x, assuming it exists. -treeNode XMLTree::PrevSibling(treeNode x) +/*treeNode XMLTree::PrevSibling(treeNode x) { NULLT_IF(x==NULLT); return prev_sibling(Par, x); } - +*/ // TaggedChild(x,tag): returns the first child of node x tagged tag, or NULLT if there is none. // Because of the balanced-parentheses representation of the tree, this operation is not supported // efficiently, just iterating among the children of node x until finding the desired child. @@ -801,11 +826,10 @@ DocID XMLTree::MyText(treeNode x) // seems faster than testing EBVector->access(x); if (tag == PCDATA_TAG_ID || tag == ATTRIBUTE_DATA_TAG_ID) - //if (EBVector->access(x)) - return (DocID) (EBVector->rank1(x)-1); //-1 because document ids start from 0 - else + return (DocID) (EBVector->rank1(x)-1); + else return (DocID) NULLT; - + } // MyText(x): returns the document identifier of the text below node x, // or NULLT if x is not a leaf node or the text is empty. Assumes Doc @@ -813,6 +837,7 @@ DocID XMLTree::MyText(treeNode x) DocID XMLTree::MyTextUnsafe(treeNode x) { return (DocID) (EBVector->rank1(x)-1); //-1 because document ids start from 0 + } // TextXMLId(d): returns the preorder of document with identifier d in the tree consisting of // all tree nodes and all text nodes. Assumes that the tree root has preorder 1. @@ -901,19 +926,21 @@ bool XMLTree::IsOpen(treeNode x) { return fast_inspect(Par,x); } void XMLTree::Print(int fd,treeNode x, bool no_text){ - if (buffer == 0) - buffer = new string(); - + if (buffer == 0) { + buffer = new string(BUFFER_ALLOC, 0); + print_stack = new std::vector(); + print_stack->reserve(256); + }; treeNode fin = fast_find_close(Par,x); treeNode n = x; TagType tag = Tag(n); - uchar * tagstr; + range r = DocIds(x); treeNode first_idx; treeNode first_text = (tag == PCDATA_TAG_ID ? x : ParentNode(r.min-1)); treeNode first_att = NULLT; - if (first_att == NULLT) + if (first_att == NULLT) first_idx = first_text; else if (first_text == NULLT) first_idx = first_att; @@ -923,10 +950,9 @@ void XMLTree::Print(int fd,treeNode x, bool no_text){ uchar * current_text=NULL; if (first_idx != NULLT) - current_text = GetText(MyText(first_idx)); + current_text = GetText(MyTextUnsafe(first_idx)); size_t read = 0; - std::vector st; while (n <= fin){ if (fast_inspect(Par,n)){ if (tag == PCDATA_TAG_ID) { @@ -940,14 +966,13 @@ void XMLTree::Print(int fd,treeNode x, bool no_text){ n+=2; // skip closing $ tag = Tag(n); - } - else { - _dputc('<',fd); - tagstr = (uchar*) GetTagNameByRef(tag); - _dputs((const char*) tagstr, fd); + } else { + + _dputc('<',fd); + _dput_str((*TagName)[tag], fd); n++; if (fast_inspect(Par,n)) { - st.push_back(tagstr); + print_stack->push_back(&((*TagName)[tag])); tag = Tag(n); if (tag == ATTRIBUTE_TAG_ID){ n++; @@ -987,17 +1012,15 @@ void XMLTree::Print(int fd,treeNode x, bool no_text){ tag=Tag(n); }; }; - } - else - do { + } else do { _dputs("back()), fd); _dputc('>', fd); - st.pop_back(); + print_stack->pop_back(); n++; - } while (!(fast_inspect(Par,n) || st.empty())); + } while (!(fast_inspect(Par,n) || print_stack->empty())); tag = Tag(n); }; _dputc('\n', fd); - _flush(fd); + //_flush(fd); }