-#include "common.h"\r
-#include "XMLTree.h"\r
-#include "timings.h"\r
-#include <errno.h>\r
-using std::cout;\r
-using std::endl;\r
-using std::min;\r
-using std::string;\r
-\r
-// functions to convert tag positions to the corresponding tree node and viceversa.\r
-// These are implemented in order to be able to change the tree and Tags representations,\r
-// without affecting the code so much.\r
-// Current implementation corresponds to balanced-parentheses representation for\r
-// the tree, and storing 2 tags per tree node (opening and closing tags).\r
-\r
-\r
-static int bits8 (int t ) {\r
- int r = bits(t);\r
- if (r <= 8)\r
- return 8;\r
- else if (r <= 16)\r
- return 16;\r
- else\r
- return r;\r
-}\r
-\r
-\r
-\r
-static treeNode fast_sibling(bp* Par,treeNode x,TagType tag){\r
-\r
- if (tag == PCDATA_TAG_ID){\r
- x = x+2;\r
- return bp_inspect(Par,x)==OP ? x : NULLT;\r
- } else return bp_next_sibling(Par,x);\r
-\r
-}\r
-\r
-\r
-\r
-\r
-inline uint get_field_no_power(uint *A, uint len, uint index) {\r
-\r
- register uint i=index*len/W, j=index*len-W*i;\r
- return (j+len <= W) ? (A[i] << (W-j-len)) >> (W-len) : (A[i] >> j) | (A[i+1] << (WW-j-len)) >> (W-len);\r
-\r
-}\r
-\r
-static uint fast_get_field(uint* A,int len, int idx)\r
-{\r
- uint f1, f2;\r
- switch (len) {\r
- case 8:\r
- return (uint) (((uchar*)A)[idx]);\r
- case 16:\r
- f2 = ((unsigned short*)A)[idx];\r
- f1 = ((unsigned short*)A)[idx+1];\r
- return (f1 << 16) + f2;\r
- default:\r
- return get_field_no_power (A,len,idx);\r
- };\r
-\r
-}\r
-\r
-\r
-\r
-\r
-XMLTree::XMLTree( pb * const par, uint npar, vector<string> * const TN, TagIdMap * const tim,\r
- uint *empty_texts_bmp, TagType *tags,\r
- TextCollectionBuilder * const TCB, bool dis_tc,\r
- TextCollectionBuilder::index_type_t _index_type )\r
- {\r
- buffer = 0;\r
- print_stack = 0;\r
- // creates the data structure for the tree topology\r
- STARTTIMER();\r
- Par = bp_construct(npar, (pb*) par, OPT_DEGREE|0);\r
- STOPTIMER(Building);\r
- PRINTTIME("Building parenthesis struct", Building);\r
- STARTTIMER();\r
-\r
-\r
- // creates structure for tags\r
-\r
- TagName = (vector<string>*)TN;\r
- tIdMap = (TagIdMap *) tim;\r
-\r
- uint max_tag = TN->size() - 1;\r
-\r
- static_bitsequence_builder *bmb = new static_bitsequence_builder_sdarray();\r
- alphabet_mapper *am = new alphabet_mapper_none();\r
- Tags = new static_sequence_bs((uint*)tags,npar,am,bmb);\r
-\r
- //cout << "Tags test: " << Tags->test((uint*)tags,npar) << endl;\r
-\r
- //Ensures that for small tag numbers, we are on an 8bit boundary.\r
- //Makes tag access way faster with negligeable waste of space.\r
- tags_blen = bits8(max_tag);\r
- std::cerr << "Tags blen is " << tags_blen << "\n";\r
- tags_len = (uint)npar;\r
- tags_fix = new uint[uint_len(tags_blen,tags_len)];\r
- for(uint i=0;i<(uint)npar;i++)\r
- set_field(tags_fix,tags_blen,i,tags[i]);\r
- delete bmb;\r
- free(tags);\r
- tags = NULL;\r
-\r
- STOPTIMER(Building);\r
- PRINTTIME("Building Tag Structure", Building);\r
-\r
- EBVector = new static_bitsequence_rrr02(empty_texts_bmp,npar,32);\r
- free(empty_texts_bmp);\r
- empty_texts_bmp = NULL;\r
-\r
-\r
- disable_tc = dis_tc;\r
- text_index_type = _index_type;\r
- if (!disable_tc) {\r
- assert(TCB != 0);\r
- STARTTIMER();\r
- Text = TCB->InitTextCollection();\r
- delete TCB;\r
- STOPTIMER(Building);\r
- PRINTTIME("Building TextCollection", Building);\r
-\r
- } else {\r
- Text = NULL;\r
- }\r
-\r
- std::cerr << "Number of distinct tags " << TagName->size() << "\n";\r
- //std::cerr.flush();\r
- }\r
-\r
-\r
-// ~XMLTree: frees memory of XML tree.\r
-XMLTree::~XMLTree()\r
- {\r
- int i;\r
-\r
- bp_delete(Par);\r
- Par = NULL;\r
-\r
- delete tIdMap;\r
- tIdMap = NULL;\r
-\r
- delete TagName;\r
- TagName = NULL;\r
-\r
- delete Tags;\r
- Tags = NULL;\r
-\r
- delete Text;\r
- Text = NULL;\r
-\r
- delete EBVector;\r
- EBVector = NULL;\r
-\r
- }\r
-\r
-\r
-void XMLTree::print_stats()\r
- {\r
- uint total_space = Tags->size()+sizeof(static_sequence*);\r
- total_space += sizeof(uint*)+sizeof(uint)*(2+uint_len(tags_blen,tags_len));\r
- cout << "Space usage for XMLTree:" << endl\r
- << " - tags static_sequence: " << Tags->size()+sizeof(static_sequence*) << endl\r
- << " - tags access array: " << sizeof(uint*)+sizeof(uint)*(2+uint_len(tags_blen,tags_len)) << endl\r
- << " ... add Diego structures ... " << endl\r
- << " *total* " << total_space << endl;\r
- }\r
-\r
-// Save: saves XML tree data structure to file.\r
-void XMLTree::Save(int fd, char * name)\r
- {\r
- FILE *fp;\r
- int i;\r
- off_t pos = lseek(fd, 0, SEEK_CUR);\r
- int fd2 = dup(fd);\r
- fp = fdopen(fd2, "w");\r
- fseek(fp, pos, SEEK_SET);\r
- // first stores the tree topology\r
- saveTree(Par, fp);\r
-\r
- // stores the table with tag names\r
- int ntags = TagName->size();\r
-\r
- ufwrite(&ntags, sizeof(int), 1, fp);\r
- for (i = 0; i<ntags;i++)\r
- fprintf(fp, "%s\n",TagName->at(i).c_str());\r
-\r
-\r
- // stores the tags\r
- Tags->save(fp);\r
- ufwrite(&tags_blen,sizeof(uint),1,fp);\r
- ufwrite(&tags_len,sizeof(uint),1,fp);\r
- ufwrite(tags_fix,sizeof(uint),uint_len(tags_blen,tags_len),fp);\r
-\r
- // flags\r
- ufwrite(&disable_tc, sizeof(bool),1,fp);\r
-\r
- //text positions\r
- EBVector->save(fp);\r
- std::cerr << "TC Index position: " << ftell(fp) << std::endl;\r
- // stores the texts\r
- if (!disable_tc) {\r
- std::cerr << "Writing " << sizeof(TextCollectionBuilder::index_type_t) << " bytes\n" << std::endl;\r
- ufwrite(&text_index_type, sizeof(TextCollectionBuilder::index_type_t), 1, fp);\r
-\r
-\r
- string file(name);\r
- switch (text_index_type){\r
- case TextCollectionBuilder::index_type_default:\r
- file.append("_default");\r
- break;\r
- case TextCollectionBuilder::index_type_swcsa:\r
- file.append("_swcsa");\r
- break;\r
- case TextCollectionBuilder::index_type_rlcsa:\r
- file.append("_rlcsa");\r
- break;\r
- };\r
-\r
- Text->Save(fp, file.c_str());\r
-\r
-\r
- }\r
- fflush(fp);\r
- fclose(fp);\r
- }\r
-\r
-// Load: loads XML tree data structure from file. Returns\r
-// a pointer to the loaded data structure\r
-XMLTree *XMLTree::Load(int fd, bool load_tc,int sample_factor, char * name)\r
- {\r
-\r
- FILE *fp;\r
- char buffer[1024];\r
- XMLTree *XML_Tree;\r
- int i;\r
-\r
- buffer[1023] = '\0';\r
-\r
- fp = fdopen(fd, "r");\r
-\r
- XML_Tree = new XMLTree();\r
- STARTTIMER();\r
- // Load the tree structure\r
- XML_Tree->Par = loadTree(fp);\r
- STOPTIMER(Loading);\r
- PRINTTIME("Loading parenthesis struct", Loading);\r
- STARTTIMER();\r
-\r
- XML_Tree->TagName = new std::vector<std::string>();\r
- XML_Tree->tIdMap = new std::unordered_map<std::string,int>();\r
- std::string s;\r
- int ntags;\r
-\r
- // Load the tag names\r
- ufread(&ntags, sizeof(int), 1, fp);\r
-\r
- for (i=0; i<ntags;i++) {\r
- if (fgets(buffer,1022,fp) != buffer)\r
- throw "Cannot read tag list";\r
- s = buffer;\r
- // remove the trailing \n\r
- s.erase(s.size()-1);\r
- XML_Tree->TagName->push_back(s);\r
- XML_Tree->tIdMap->insert(std::make_pair(s,i));\r
-\r
- };\r
- STOPTIMER(Loading);\r
- PRINTTIME("Loading tag names struct", Loading);\r
- STARTTIMER();\r
-\r
- // loads the tag structure\r
- XML_Tree->Tags = static_sequence::load(fp);\r
- ufread(&XML_Tree->tags_blen,sizeof(uint),1,fp);\r
- std::cerr << "tags_blen is "<< XML_Tree->tags_blen <<"\n";\r
- ufread(&XML_Tree->tags_len,sizeof(uint),1,fp);\r
- XML_Tree->tags_fix = new uint[uint_len(XML_Tree->tags_blen,XML_Tree->tags_len)];\r
- ufread(XML_Tree->tags_fix,sizeof(uint),uint_len(XML_Tree->tags_blen,XML_Tree->tags_len),fp);\r
-\r
- // TODO ask francisco about this\r
- /// FIXME:UGLY tests!\r
- //uint * seq = new uint[XML_Tree->tags_len];\r
- //for(uint i=0;i<XML_Tree->tags_len;i++)\r
- // seq[i] = get_field(XML_Tree->tags_fix,XML_Tree->tags_blen,i);\r
- //cout << "Tags test: " << XML_Tree->Tags->test(seq,XML_Tree->tags_len) << endl;\r
- //XML_Tree->Tags->test(seq,XML_Tree->tags_len);\r
- //delete [] seq;\r
- /// End ugly tests\r
-\r
- STOPTIMER(Loading);\r
- std::cerr << (uint_len(XML_Tree->tags_blen,XML_Tree->tags_len)*sizeof(uint))/(1024*1024) << " MB for tag sequence" << std::endl;\r
- PRINTTIME("Loading tag struct", Loading);\r
- STARTTIMER();\r
-\r
- // loads the flags\r
-\r
- ufread(&(XML_Tree->disable_tc), sizeof(bool), 1, fp);\r
- if (load_tc) {\r
- XML_Tree->EBVector = static_bitsequence_rrr02::load(fp);\r
-\r
- STOPTIMER(Loading);\r
- PRINTTIME("Loading text bitvector struct", Loading);\r
- STARTTIMER();\r
- std::cerr << "TC Load Index position: " << ftell(fp) << std::endl;\r
- // Not used\r
- // loads the texts\r
- if (!XML_Tree->disable_tc){\r
- ufread(&(XML_Tree->text_index_type),\r
- sizeof(TextCollectionBuilder::index_type_t), 1, fp);\r
- string file(name);\r
- switch (XML_Tree->text_index_type){\r
- case TextCollectionBuilder::index_type_default:\r
- file.append("_default");\r
- break;\r
- case TextCollectionBuilder::index_type_swcsa:\r
- file.append("_swcsa");\r
- break;\r
- case TextCollectionBuilder::index_type_rlcsa:\r
- file.append("_rlcsa");\r
- break;\r
- };\r
-\r
-\r
- XML_Tree->Text = TextCollection::Load(fp, file.c_str(), TextCollection::index_mode_default, sample_factor);\r
-\r
- }\r
- else XML_Tree->Text = NULL;\r
- STOPTIMER(Loading);\r
- PRINTTIME("Loading TextCollection", Loading);\r
- STARTTIMER();\r
- }\r
- else {\r
- XML_Tree->EBVector = NULL;\r
- XML_Tree->Text = NULL;\r
- XML_Tree->disable_tc = true;\r
- };\r
-\r
-\r
- return XML_Tree;\r
- }\r
-\r
-\r
-\r
-\r
-int XMLTree::SubtreeElements(treeNode x)\r
- {\r
-\r
- int size = bp_subtree_size(Par, x);\r
- if (x == Root()){\r
- x = bp_first_child(Par,x);\r
- size = size - 1;\r
- };\r
-\r
- int s = x + 2*size - 1;\r
- int ntext = Tags->rank(PCDATA_TAG_ID, s) - Tags->rank(PCDATA_TAG_ID, node2tagpos(x)-1);\r
- size = size - ntext;\r
- treeNode fin = bp_find_close(Par,x);\r
- treeNode y = Tags->select_next(ATTRIBUTE_TAG_ID,node2tagpos(x));\r
- while (y != NULLT && y < fin){\r
- size -= SubtreeSize(y);\r
- y = Tags->select_next(ATTRIBUTE_TAG_ID, node2tagpos(y));\r
- };\r
- return size;\r
- }\r
-\r
-// IsLeaf(x): returns whether node x is leaf or not. In the succinct representation\r
-// this is just a bit inspection.\r
-bool XMLTree::IsLeaf(treeNode x)\r
- {\r
- NULLT_IF(x==NULLT);\r
- return bp_isleaf(Par, x);\r
- }\r
-\r
-// IsAncestor(x,y): returns whether node x is ancestor of node y.\r
-bool XMLTree::IsAncestor(treeNode x, treeNode y)\r
- {\r
- return bp_is_ancestor(Par, x, y);\r
- }\r
-\r
-// IsChild(x,y): returns whether node x is parent of node y.\r
-bool XMLTree::IsChild(treeNode x, treeNode y)\r
- {\r
- if (!bp_is_ancestor(Par, x, y)) return false;\r
- return bp_depth(Par, x) == (bp_depth(Par, y) + 1);\r
- }\r
-\r
-\r
-// NumChildren(x): number of children of node x. Constant time with the data structure\r
-// of Sadakane.\r
-int XMLTree::NumChildren(treeNode x)\r
- {\r
- return bp_degree(Par, x);\r
- }\r
-\r
-// ChildNumber(x): returns i if node x is the i-th children of its parent.\r
-int XMLTree::ChildNumber(treeNode x)\r
- {\r
- return bp_child_rank(Par, x);\r
- }\r
-\r
-// Depth(x): depth of node x, a simple binary rank on the parentheses sequence.\r
-int XMLTree::Depth(treeNode x)\r
- {\r
- return bp_depth(Par, x);\r
- }\r
-\r
-// Preorder(x): returns the preorder number of node x, just counting the tree\r
-// nodes (i.e., tags, it disregards the texts in the tree).\r
-int XMLTree::Preorder(treeNode x)\r
- {\r
- return bp_preorder_rank(Par, x);\r
- }\r
-\r
-// Postorder(x): returns the postorder number of node x, just counting the tree\r
-// nodes (i.e., tags, it disregards the texts in the tree).\r
-int XMLTree::Postorder(treeNode x)\r
- {\r
- return bp_postorder_rank(Par, x);\r
- }\r
-\r
-// DocIds(x): returns the range of text identifiers that descend from node x.\r
-// returns {NULLT, NULLT} when there are no texts descending from x.\r
-range XMLTree::DocIds(treeNode x)\r
- {\r
- range r;\r
- if (x == NULLT) {\r
- r.min = NULLT;\r
- r.max = NULLT;\r
- return r;\r
- };\r
- int min = EBVector->rank1(x-1);\r
- int max = EBVector->rank1(x+2*bp_subtree_size(Par, x)-2);\r
- if (min==max) { // range is empty, no texts within the subtree of x\r
- r.min = NULLT;\r
- r.max = NULLT;\r
- }\r
- else { // the range is non-empty, there are texts within the subtree of x\r
- r.min = min+1;\r
- r.max = max;\r
- }\r
- return r;\r
- }\r
-\r
-\r
-// Child(x,i): returns the i-th child of node x, assuming it exists.\r
-treeNode XMLTree::Child(treeNode x, int i)\r
-{\r
- if (i <= OPTD) return bp_naive_child(Par, x, i);\r
- else return bp_child(Par, x, i);\r
-}\r
-\r
-\r
-treeNode XMLTree::SelectChild(treeNode x, TagIdSet *tags)\r
-{\r
-\r
- NULLT_IF(x==NULLT || bp_isleaf(Par,x));\r
- int i;\r
- treeNode child = bp_first_child(Par, x);\r
- TagType t;\r
- while (child != NULLT) {\r
- t = Tag(child);\r
- if (tags->find(t) != tags->end()) return child;\r
- child = fast_sibling(Par, child,t);\r
- }\r
- return NULLT;\r
-}\r
-\r
-\r
-treeNode XMLTree::SelectFollowingSibling(treeNode x, TagIdSet *tags)\r
-{\r
-\r
- NULLT_IF(x==NULLT);\r
- int i;\r
- TagType t;\r
- treeNode sibling = bp_next_sibling(Par, x);\r
- while (sibling != NULLT) {\r
- t = Tag(sibling);\r
- if (tags->find(t) != tags->end()) return sibling;\r
- sibling = fast_sibling(Par, sibling,t);\r
- }\r
- return NULLT;\r
- }\r
-\r
-\r
-// TaggedPrec(x,tag): returns the first node tagged tag with smaller preorder than x and not an\r
-// ancestor of x. Returns NULLT if there is none.\r
-treeNode XMLTree::TaggedPreceding(treeNode x, TagType tag)\r
- {\r
- int r, s;\r
- treeNode node_s, root;\r
- r = (int)Tags->rank(tag, node2tagpos(x)-1);\r
- if (r==0) return NULLT; // there is no such node.\r
- s = (int)Tags->select(tag, r);\r
- root = bp_root_node(Par);\r
- node_s = tagpos2node(s);\r
- while (bp_is_ancestor(Par, node_s, x) && (node_s!=root)) { // the one that we found is an ancestor of x\r
- r--;\r
- if (r==0) return NULLT; // there is no such node\r
- s = (int)Tags->select(tag, r); // we should use select_prev instead when provided\r
- node_s = tagpos2node(s);\r
- }\r
- return NULLT; // there is no such node\r
- }\r
-\r
-\r
-// TaggedFoll(x,tag): returns the first node tagged tag with larger preorder than x and not in\r
-// the subtree of x. Returns NULLT if there is none.\r
-treeNode XMLTree::TaggedFollowing(treeNode x, TagType tag)\r
- {\r
- NULLT_IF (x ==NULLT || x == Root());\r
- return tagpos2node(Tags->select_next(tag, bp_find_close(Par, x)));\r
-\r
- }\r
-\r
-\r
-/* Here we inline TaggedFoll to find the min globally, and only at the end\r
- we check if the min is below the context node */\r
-treeNode XMLTree::SelectFollowingBelow(treeNode x, TagIdSet *tags, treeNode ancestor)\r
- {\r
-\r
- NULLT_IF(x==NULLT || x==Root());\r
-\r
- treeNode close = bp_find_close(Par,x);\r
- treeNode ns = close+1;\r
- if ( (bp_inspect(Par,ns) == OP) && (tags->find(Tag(ns)) != tags->end()))\r
- return ns;\r
-\r
- int i;\r
- treeNode min = NULLT;\r
- treeNode aux;\r
-\r
-\r
- TagIdSet::const_iterator tagit;\r
- for (tagit = tags->begin(); tagit != tags->end(); ++tagit) {\r
-\r
- aux = tagpos2node(Tags->select_next(*tagit, close));\r
- if (aux == NULLT) continue;\r
- if ((min == NULLT) || (aux < min)) min = aux;\r
- };\r
-\r
- // found the smallest node in preorder which is after x.\r
- // if ctx is the root node, just return what we found.\r
-\r
- if (ancestor == Root() || min == NULLT || min < bp_find_close(Par, ancestor)) return min;\r
- else return NULLT;\r
-\r
- }\r
-// TaggedAncestor(x, tag): returns the closest ancestor of x tagged tag. Return\r
-// NULLT is there is none.\r
-treeNode XMLTree::TaggedAncestor(treeNode x, TagType tag)\r
- {\r
- if (x == NULLT || x == Root())\r
- return NULLT;\r
-\r
- treeNode s = bp_parent(Par, x), r = Root();\r
- while (s != r) {\r
- if (Tag(s) == tag) return s;\r
- s = bp_parent(Par, s);\r
- }\r
- return NULLT;\r
- }\r
-\r
-\r
-\r
-// MyText(x): returns the document identifier of the text below node x,\r
-// or NULLT if x is not a leaf node or the text is empty. Assumes Doc\r
-// ids start from 0.\r
-DocID XMLTree::MyText(treeNode x)\r
-{\r
- TagType tag = Tag(x);\r
- // seems faster than testing EBVector->access(x);\r
-\r
- if (tag == PCDATA_TAG_ID || tag == ATTRIBUTE_DATA_TAG_ID)\r
- return (DocID) (EBVector->rank1(x)-1);\r
- else\r
- return (DocID) NULLT;\r
-\r
-}\r
-// MyText(x): returns the document identifier of the text below node x,\r
-// or NULLT if x is not a leaf node or the text is empty. Assumes Doc\r
-// ids start from 0.\r
-DocID XMLTree::MyTextUnsafe(treeNode x)\r
-{\r
- return (DocID) (EBVector->rank1(x)-1); //-1 because document ids start from 0\r
-\r
-}\r
-// TextXMLId(d): returns the preorder of document with identifier d in the tree consisting of\r
-// all tree nodes and all text nodes. Assumes that the tree root has preorder 1.\r
-int XMLTree::TextXMLId(DocID d)\r
- {\r
- NULLT_IF(d == NULLT);\r
- int s = EBVector->select1(d+1);\r
- return bp_rank_open(Par, s) + d + 1; // +1 because root has preorder 1\r
-\r
- }\r
-\r
-// NodeXMLId(x): returns the preorder of node x in the tree consisting\r
-// of all tree nodes and all text nodes. Assumes that the tree root has\r
-// preorder 0;\r
-int XMLTree::NodeXMLId(treeNode x)\r
- {\r
- NULLT_IF(x == NULLT);\r
- if (x == Root()) return 1; // root node has preorder 1\r
- return bp_rank_open(Par, x) + EBVector->rank1(x-1);\r
- }\r
-\r
-// ParentNode(d): returns the parent node of document identifier d.\r
-treeNode XMLTree::ParentNode(DocID d)\r
- {\r
- NULLT_IF (d == NULLT);\r
- return (treeNode) EBVector->select1(d+1);\r
- }\r
-\r
-// GetTagId: returns the tag identifier corresponding to a given tag name.\r
-// Returns NULLT in case that the tag name does not exists.\r
-TagType XMLTree::GetTagId(unsigned char *tagname)\r
- {\r
-\r
- string s = (char *) tagname;\r
- TagIdMapIT it = tIdMap->find(s);\r
- return (TagType) ((it != tIdMap->end()) ? it->second : -1);\r
-\r
- }\r
-\r
-\r
-// GetTagName(tagid): returns the tag name of a given tag identifier.\r
-// Returns NULL in case that the tag identifier is not valid.\r
-unsigned char *XMLTree::GetTagName(TagType tagid)\r
- {\r
- unsigned char *s;\r
- if ( tagid < 0 || tagid >= TagName->size())\r
- return (unsigned char *) "<INVALID TAG>";\r
- strcpy((char *)s, (*TagName)[tagid].c_str());\r
-\r
- return (s == NULL ? (unsigned char*) "<INVALID TAG>" : s);\r
- }\r
-\r
-\r
-const unsigned char *XMLTree::GetTagNameByRef(TagType tagid)\r
- {\r
-\r
- unsigned char *s;\r
- if ( tagid < 0 || tagid >= TagName->size())\r
- return (unsigned char *) "<INVALID TAG>";\r
-\r
- return (const unsigned char *) (*TagName)[tagid].c_str();\r
-\r
- }\r
-\r
-\r
-\r
-TagType XMLTree::RegisterTag(unsigned char *tagname)\r
- {\r
- TagType id = XMLTree::GetTagId(tagname);\r
- if (id == NULLT) {\r
- string s = (char *) tagname;\r
- REGISTER_TAG(TagName,tIdMap,s);\r
- };\r
-\r
- return id;\r
- }\r
-\r
-\r
-treeNode XMLTree::Closing(treeNode x) {\r
- return bp_find_close(Par,x);\r
-}\r
-bool XMLTree::IsOpen(treeNode x) { return bp_inspect(Par,x); }\r
-\r
-//WARNING this uses directly the underlying implementation for plain text\r
-\r
-\r
-void XMLTree::Print(int fd,treeNode x, bool no_text){\r
-\r
- if (buffer == 0) {\r
- buffer = new string(BUFFER_ALLOC, 0);\r
- buffer->clear();\r
- print_stack = new std::vector<string *>();\r
- print_stack->reserve(256);\r
- };\r
-\r
- treeNode fin = bp_find_close(Par,x);\r
- treeNode n = x;\r
- TagType tag = Tag(n);\r
-\r
- range r = DocIds(x);\r
- treeNode first_idx;\r
- treeNode first_text = (tag == PCDATA_TAG_ID ? x : ParentNode(r.min-1));\r
- treeNode first_att = NULLT;\r
-\r
- if (first_att == NULLT)\r
- first_idx = first_text;\r
- else if (first_text == NULLT)\r
- first_idx = first_att;\r
- else\r
- first_idx = min(first_att,first_text);\r
-\r
- uchar * current_text=NULL;\r
-\r
- if (first_idx != NULLT)\r
- current_text = GetText(MyTextUnsafe(first_idx));\r
- uchar * orig_text = current_text;\r
- size_t read = 0;\r
-\r
- while (n <= fin){\r
- if (bp_inspect(Par,n)){\r
- if (tag == PCDATA_TAG_ID) {\r
-\r
- if (no_text)\r
- _dputs("<$/>", fd);\r
- else {\r
- read = _dprintf((const char*) current_text, fd);\r
- current_text += (read + 1);\r
- };\r
- n+=2; // skip closing $\r
- tag = Tag(n);\r
-\r
- } else {\r
-\r
- _dputc('<',fd);\r
- _dput_str((*TagName)[tag], fd);\r
- n++;\r
- if (bp_inspect(Par,n)) {\r
- print_stack->push_back(&((*TagName)[tag]));\r
- tag = Tag(n);\r
- if (tag == ATTRIBUTE_TAG_ID){\r
- n++;\r
- if (no_text) _dputs("><@@>",fd);\r
-\r
- while (bp_inspect(Par,n)){\r
- if (no_text) {\r
- _dputc('<', fd);\r
- _dputs((const char*) &(GetTagNameByRef(Tag(n))[3]), fd);\r
- _dputc('>', fd);\r
- _dputs("<$@/></", fd);\r
- _dputs((const char*) &(GetTagNameByRef(Tag(n))[3]), fd);\r
- _dputc('>', fd);\r
- n+= 4;\r
- } else {\r
- _dputc(' ', fd);\r
- _dputs((const char*) &(GetTagNameByRef(Tag(n))[3]), fd);\r
- n++;\r
- _dputs("=\"", fd);\r
- read = _dprintf((const char*) current_text, fd);\r
- current_text += (read + 1);\r
- _dputc('"', fd);\r
- n+=3;\r
- }\r
- };\r
- if (no_text) _dputs("</@@>", fd);\r
- else _dputc('>', fd);\r
- n++;\r
- tag=Tag(n);\r
-\r
- } else\r
- _dputc('>', fd);\r
-\r
- } else {// <foo /> tag\r
- _dputs("/>", fd);\r
- n++;\r
- tag=Tag(n);\r
- };\r
- };\r
- } else do {\r
- _dputs("</", fd);\r
- _dput_str(*(print_stack->back()), fd);\r
- _dputc('>', fd);\r
- print_stack->pop_back();\r
- n++;\r
- } while (!(bp_inspect(Par,n) || print_stack->empty()));\r
- tag = Tag(n);\r
- };\r
- _dputc('\n', fd);\r
- if (orig_text && text_index_type != (TextCollectionBuilder::index_type_default))\r
- Text->DeleteText(orig_text);\r
- //_flush(fd);\r
-}\r