From: kim Date: Tue, 19 Apr 2011 08:16:19 +0000 (+0000) Subject: Rewrite printing function to make it faster. Now also handles X-Git-Url: http://git.nguyen.vg/gitweb/?p=SXSI%2FXMLTree.git;a=commitdiff_plain;h=44c3b5aabb8782b15e66d7d14ab19b280d7eb20f Rewrite printing function to make it faster. Now also handles printing <, >, &apos, " and & instead of <, >, ', ", & git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/XMLTree@1052 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- diff --git a/XMLTree.cpp b/XMLTree.cpp index fd2c6de..5033877 100644 --- a/XMLTree.cpp +++ b/XMLTree.cpp @@ -309,12 +309,13 @@ XMLTree *XMLTree::Load(int fd, char *filename, bool load_tc,int sample_factor) // SubtreeSize(x): the number of nodes (and attributes) in the subtree of node x. -int XMLTree::SubtreeSize(treeNode x) +/*int XMLTree::SubtreeSize(treeNode x) { return subtree_size(Par, x); } - +*/ // SubtreeTags(x,tag): the number of occurrences of tag within the subtree of node x. +/* int XMLTree::SubtreeTags(treeNode x, TagType tag) { if (x == Root()) @@ -325,6 +326,7 @@ int XMLTree::SubtreeTags(treeNode x, TagType tag) return (Tags->rank(tag, s) - Tags->rank(tag, node2tagpos(x)-1))+1; } +*/ int XMLTree::SubtreeElements(treeNode x) { @@ -598,7 +600,7 @@ treeNode XMLTree::TaggedDescendant(treeNode x, TagType tag) return (fast_is_ancestor(Par,x,y) ? y : NULLT); } */ - +/* treeNode XMLTree::SelectDescendant(treeNode x, TagIdSet *tags) { NULLT_IF (x ==NULLT || fast_isleaf(Par,x)); @@ -616,7 +618,7 @@ treeNode XMLTree::SelectDescendant(treeNode x, TagIdSet *tags) return min; } - +*/ // TaggedPrec(x,tag): returns the first node tagged tag with smaller preorder than x and not an // ancestor of x. Returns NULLT if there is none. @@ -707,7 +709,7 @@ treeNode XMLTree::SelectFollowingBelow(treeNode x, TagIdSet *tags, treeNode ance else return NULLT; } - +/* treeNode XMLTree::SelectFollowingBefore(treeNode x, TagIdSet *tags, treeNode ancestor_closing) { @@ -738,7 +740,7 @@ treeNode XMLTree::SelectFollowingBefore(treeNode x, TagIdSet *tags, treeNode anc else return NULLT; } - +*/ /* treeNode XMLTree::SelectFollowingBefore(treeNode x, TagIdSet *tags, treeNode closing) { @@ -899,17 +901,9 @@ bool XMLTree::IsOpen(treeNode x) { return fast_inspect(Par,x); } void XMLTree::Print(int fd,treeNode x, bool no_text){ - int newfd = dup(fd); - stream = fdopen(newfd,"wa"); - if (stream == 0){ - perror(NULL); - return; - }; - if (buffer == 0) buffer = new string(); - - FILE* fp = stream; + treeNode fin = fast_find_close(Par,x); treeNode n = x; TagType tag = Tag(n); @@ -920,92 +914,90 @@ void XMLTree::Print(int fd,treeNode x, bool no_text){ treeNode first_att = NULLT; if (first_att == NULLT) - first_idx = first_text; + first_idx = first_text; else if (first_text == NULLT) - first_idx = first_att; + first_idx = first_att; else - first_idx = min(first_att,first_text); + first_idx = min(first_att,first_text); uchar * current_text=NULL; + if (first_idx != NULLT) - current_text = GetText(MyText(first_idx)); + current_text = GetText(MyText(first_idx)); + size_t read = 0; std::vector st; - while (n <= fin){ - if (fast_inspect(Par,n)){ - if (tag == PCDATA_TAG_ID ) { - - if (no_text) - myfputs("<$/>",fp); - else{ - read = myfprintf((const char*) current_text, fp); - current_text += (read + 1); - }; - n+=2; // skip closing $ - tag = Tag(n); - - } - else { - myfputc('<',fp); - tagstr = (uchar*) GetTagNameByRef(tag); - myfputs((const char*) tagstr ,fp); - n++; - if (fast_inspect(Par,n)) { - st.push_back(tagstr); - tag = Tag(n); - if (tag == ATTRIBUTE_TAG_ID){ - n++; - if (no_text) myfputs("><@@>",fp); - while (fast_inspect(Par,n)){ - if (no_text) { - myfputc('<',fp); - myfputs((const char*) &(GetTagNameByRef(Tag(n))[3]),fp); - myfputc('>',fp); - myfputs("<$@/>',fp); - n+= 4; - } - else { - myfputc(' ',fp); - myfputs((const char*) &(GetTagNameByRef(Tag(n))[3]),fp); - n++; - myfputs("=\"",fp); - read = myfprintf((const char*) current_text,fp); - current_text += (read + 1); - myfputc('"',fp); - n+=3; - } - }; - if (no_text) - myfputs("",fp); - else myfputc('>',fp); - n++; - tag=Tag(n); - } + while (n <= fin){ + if (fast_inspect(Par,n)){ + if (tag == PCDATA_TAG_ID) { + + if (no_text) + _dputs("<$/>", fd); else { - myfputc('>',fp); + read = _dprintf((const char*) current_text, fd); + current_text += (read + 1); }; + n+=2; // skip closing $ + tag = Tag(n); + } - else {// tag - myfputs("/>",fp); + else { + _dputc('<',fd); + tagstr = (uchar*) GetTagNameByRef(tag); + _dputs((const char*) tagstr, fd); n++; - tag=Tag(n); - }; - }; - } - else - do { - myfputs("', fp); - st.pop_back(); - n++; - }while (!fast_inspect(Par,n) && !st.empty()); - tag=Tag(n); - }; - myfputc('\n',fp); - mybufferflush(fp); - //fflush(fp); - fclose(fp); + if (fast_inspect(Par,n)) { + st.push_back(tagstr); + tag = Tag(n); + if (tag == ATTRIBUTE_TAG_ID){ + n++; + if (no_text) _dputs("><@@>",fd); + + while (fast_inspect(Par,n)){ + if (no_text) { + _dputc('<', fd); + _dputs((const char*) &(GetTagNameByRef(Tag(n))[3]), fd); + _dputc('>', fd); + _dputs("<$@/>', fd); + n+= 4; + } else { + _dputc(' ', fd); + _dputs((const char*) &(GetTagNameByRef(Tag(n))[3]), fd); + n++; + _dputs("=\"", fd); + read = _dprintf((const char*) current_text, fd); + current_text += (read + 1); + _dputc('"', fd); + n+=3; + } + }; + if (no_text) _dputs("", fd); + else _dputc('>', fd); + n++; + tag=Tag(n); + + } else + _dputc('>', fd); + + } else {// tag + _dputs("/>", fd); + n++; + tag=Tag(n); + }; + }; + } + else + do { + _dputs("', fd); + st.pop_back(); + n++; + } while (!(fast_inspect(Par,n) || st.empty())); + tag = Tag(n); + }; + _dputc('\n', fd); + _flush(fd); } diff --git a/XMLTree.h b/XMLTree.h index 95ec450..5c010a5 100644 --- a/XMLTree.h +++ b/XMLTree.h @@ -1,678 +1,741 @@ -/****************************************************************************** - * Copyright (C) 2008 by Diego Arroyuelo * - * Interface for the in-memory XQuery/XPath engine * - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU Lesser General Public License as published * - * by the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - * This program is distributed in the hope that it will be useful, * - * but WITHOUT ANY WARRANTY; without even the implied warranty of * - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * - * GNU Lesser General Public License for more details. * - * * - * You should have received a copy of the GNU Lesser General Public License * - * along with this program; if not, write to the * - * Free Software Foundation, Inc., * - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - ******************************************************************************/ - -#ifndef XMLTREE_H_ -#define XMLTREE_H_ - - -#include -#include -#include -#include "TextCollection/TextCollectionBuilder.h" - -#undef W -#undef WW -#undef Wminusone - -#include "bp.h" -#include -#include -#include -#include -using SXSI::TextCollection; -using SXSI::TextCollectionBuilder; - - -// this constant is used to efficiently compute the child operation in the tree -#define OPTD 10 - -#define NULLT -1 - -#define PERM_SAMPLE 10 - - -typedef int treeNode; -typedef int TagType; -typedef int DocID; - -typedef struct { - int min; - int max; -} range; - -// Encoding of the XML Document : -// The following TAGs and IDs are fixed, "" is the tag of the root. -// a TextNode is represented by a leaf <<$>>> The DocId in the TextCollection -// of that leaf is kept in a bit sequence. -// a TextNode below an attribute is likewise represented by a leaf <<@$>><> -// An element ... the representation is: -// <<@>> <<@>a1> <<$@>>DocID(v1)>a1> ... > .... -// Hence the attributes (if any) are always below the first child of their element, -// as the children of a fake node <@>. - - -#define DOCUMENT_OPEN_TAG "" -#define DOCUMENT_TAG_ID 0 -#define ATTRIBUTE_OPEN_TAG "<@>" -#define ATTRIBUTE_TAG_ID 1 -#define PCDATA_OPEN_TAG "<$>" -#define PCDATA_TAG_ID 2 -#define ATTRIBUTE_DATA_OPEN_TAG "<@$>" -#define ATTRIBUTE_DATA_TAG_ID 3 -#define CLOSING_TAG "" -#define CLOSING_TAG_ID 4 -#define DOCUMENT_CLOSE_TAG "/" -#define ATTRIBUTE_CLOSE_TAG "/<@>" -#define PCDATA_CLOSE_TAG "/<$>" -#define ATTRIBUTE_DATA_CLOSE_TAG "/<@$>" - - -typedef std::unordered_set TagIdSet; -typedef std::unordered_map TagIdMap; -typedef TagIdMap::const_iterator TagIdMapIT; - -#define REGISTER_TAG(v,h,t) do { (h)->insert(std::make_pair((t),(v)->size()));\ - (v)->push_back(t); } while (false) - -// returns NULLT if the test is true -#define NULLT_IF(x) do { if (x) return NULLT; } while (0) - -// Direct calls to sarray library - -static inline int fast_find_close(bp *b,int s) -{ - return fwd_excess(b,s,-1); -} - -static inline int fast_inspect(bp* Par,treeNode i) -{ - int j,l; - j = i >> logD; - l = i & (D-1); - return (Par->B[j] >> (D-1-l)) & 1; -} - -static bool fast_isleaf(bp* Par,treeNode x){ - return (fast_inspect(Par, x+1) == CP); -} - -static treeNode fast_first_child(bp *Par, treeNode x) -{ - x = x+1; - return (fast_inspect(Par,x) == OP) ? x : NULLT; -} - -inline static treeNode fast_next_sibling(bp* Par,treeNode x) -{ - treeNode y = fast_find_close(Par,x)+1; - return (fast_inspect(Par, y) == OP) ? y : NULLT; -} - -inline static bool fast_is_ancestor(bp * Par,treeNode x,treeNode y){ - return (x <= y) && ((x==0) || (y <= fast_find_close(Par,x))); -} - -// tag position -> tree node -static treeNode tagpos2node(int t) - { - return (treeNode) t; - } -// tree node -> tag position -static int node2tagpos(treeNode x) -{ - return (int)x; -} - - -class XMLTreeBuilder; - -class XMLTree { - - // Only the builder can access the constructor - friend class XMLTreeBuilder; - - private: - /** Balanced parentheses representation of the tree */ - bp *Par; - - /** Mapping from tag identifer to tag name */ - std::vector *TagName; - TagIdMap * tIdMap; - - /** Bit vector indicating with a 1 the positions of the non-empty texts. */ - static_bitsequence *EBVector; - - /** Tag sequence represented with a data structure for rank and select */ - static_sequence *Tags; - uint * tags_fix; - uint tags_blen, tags_len; - - /** The texts in the XML document */ - TextCollection *Text; - - // Allows to disable the TextCollection for benchmarkin purposes - bool disable_tc; - - FILE* stream; - int stream_fd; - std::string * buffer; - void myfputs(const char* s, FILE * fp){ - buffer->append(s); - if (buffer->size() >= 100000){ - fputs(buffer->c_str(),fp); - buffer->clear(); - }; - - } - void myfputc(const char c, FILE*fp){ - buffer->append(1,c); - if (buffer->size() >= 100000){ - fputs(buffer->c_str(),fp); - buffer->clear(); - }; - } - void mybufferflush(FILE* fp){ - fputs(buffer->c_str(), fp); - buffer->clear(); - } - - size_t myfprintf(const char* s, FILE * fp){ - if (s == NULL) - return 0; - size_t i = buffer->size(); - buffer->append(s); - size_t j = buffer->size(); - if (buffer->size() >= 100000){ - fputs(buffer->c_str(),fp); - buffer->clear(); - }; - return (j-i); - } - - void PrintNode(treeNode n, int fd); - /** Data structure constructors */ - XMLTree(){ buffer = 0;}; - - // non const pointer are freed by this method. - XMLTree( pb * const par, uint npar, std::vector * const TN, TagIdMap * const tim, uint *empty_texts_bmp, TagType *tags, - TextCollection * const TC, bool dis_tc); - -public: - /** Data structure destructor */ - ~XMLTree(); - - /** root(): returns the tree root. */ - treeNode Root() { return 0; } - - /** Size() : Number of parenthesis */ - unsigned int Size(){ - return tags_len/2; - } - - - /** NumTags() : Number of distinct tags */ - unsigned int NumTags() { - return TagName->size(); - } - - int TagsBinaryLength(){ return tags_blen; }; - unsigned int TagStructLength(){ return uint_len(tags_blen,tags_len); }; - unsigned int * TagStruct() { return tags_fix; }; - - - /** SubtreeSize(x): the number of nodes (and attributes) in the subtree of - * node x. */ - int SubtreeSize(treeNode x); - - /** SubtreeTags(x,tag): the number of occurrences of tag within the subtree - * of node x. */ - int SubtreeTags(treeNode x, TagType tag); - - /** SubtreeElements(x) of element nodes in the subtree of x - */ - int SubtreeElements(treeNode x); - - /** IsLeaf(x): returns whether node x is leaf or not. In the succinct - * representation this is just a bit inspection. */ - - bool IsLeaf(treeNode x); - - /** IsAncestor(x,y): returns whether node x is ancestor of node y. */ - - bool IsAncestor(treeNode x, treeNode y); - - /** IsChild(x,y): returns whether node x is parent of node y. */ - bool IsChild(treeNode x, treeNode y); - - /** IsFirstChild(x): returns whether node x is the first child of its parent. */ - /* OCAML */ - bool IsFirstChild(treeNode x) { - return ((x != NULLT)&&(x==Root() || prev_sibling(Par,x) == (treeNode)-1)); - }; - - /** NumChildren(x): number of children of node x. Constant time with the - * data structure of Sadakane. */ - int NumChildren(treeNode x); - - /** ChildNumber(x): returns i if node x is the i-th children of its - * parent. */ - int ChildNumber(treeNode x); - - /** Depth(x): depth of node x, a simple binary rank on the parentheses - * sequence. */ - int Depth(treeNode x); - - /** Preorder(x): returns the preorder number of node x, just regarding tree - * nodes (and not texts). */ - int Preorder(treeNode x); - - /** Postorder(x): returns the postorder number of node x, just regarding - * tree nodes (and not texts). */ - int Postorder(treeNode x); - - - /** DocIds(x): returns the range (i.e., a pair of integers) of document - * identifiers that descend from node x. */ - range DocIds(treeNode x); - - /** Parent(x): returns the parent node of node x. */ - treeNode Parent(treeNode x) { - if (x == Root()) - return NULLT; - else - return parent(Par, x); - }; - /* Assumes x is neither 0 nor -1 */ - - /** Child(x,i): returns the i-th child of node x, assuming it exists. */ - treeNode Child(treeNode x, int i); - - - - /** LastChild(x): returns the last child of node x. */ - treeNode LastChild(treeNode x); - - - - /** PrevSibling(x): returns the previous sibling of node x, assuming it - * exists. */ - - treeNode PrevSibling(treeNode x); - - /** TaggedChild(x,tag): returns the first child of node x tagged tag, or - * NULLT if there is none. Because of the balanced-parentheses representation - * of the tree, this operation is not supported efficiently, just iterating - * among the children of node x until finding the desired child. */ - - - treeNode SelectChild(treeNode x, TagIdSet * tags); - - /** TaggedFollowingSibling(x,tag): returns the first sibling of node x tagged tag, or - * NULLT if there is none. */ - - treeNode SelectFollowingSibling(treeNode x, TagIdSet * tags); - - - - - treeNode SelectDescendant(treeNode x, TagIdSet * tags); - - /** TaggedPrec(x,tag): returns the first node tagged tag with smaller - * preorder than x and not an ancestor of x. Returns NULLT if there - * is none. */ - treeNode TaggedPreceding(treeNode x, TagType tag); - - /** TaggedFoll(x,tag): returns the first node tagged tag with larger - * preorder than x and not in the subtree of x. Returns NULLT if there - * is none. */ - treeNode TaggedFollowing(treeNode x, TagType tag); - - - - treeNode SelectFollowingBelow(treeNode x, TagIdSet * tags, treeNode ancestor); - - // treeNode TaggedFollowingBefore(treeNode x, TagType tag,treeNode closing); - - treeNode SelectFollowingBefore(treeNode x, TagIdSet * tags, treeNode closing); - - /** TaggedAncestor(x, tag): returns the closest ancestor of x tagged - * tag. Return NULLT is there is none. */ - treeNode TaggedAncestor(treeNode x, TagType tag); - - /** PrevText(x): returns the document identifier of the text to the left of - * node x, or NULLT if x is the root node. */ - DocID PrevText(treeNode x); - - /** NextText(x): returns the document identifier of the text to the right of - * node x, or NULLT if x is the root node. */ - DocID NextText(treeNode x); - - /** MyText(x): returns the document identifier of the text below node x, or - * NULLT if x is not a leaf node. */ - DocID MyText(treeNode x); - DocID MyTextUnsafe(treeNode x); - - /** TextXMLId(d): returns the preorder of document with identifier d in the - * tree consisting of all tree nodes and all text nodes. */ - int TextXMLId(DocID d); - - /** NodeXMLId(x): returns the preorder of node x in the tree consisting of - * all tree nodes and all text nodes. */ - int NodeXMLId(treeNode x); - - /** ParentNode(d): returns the parent node of document identifier d. */ - treeNode ParentNode(DocID d); - - treeNode PrevNode(DocID d); - - /** GetTagId(tagname): returns the tag identifier corresponding to a given - * tag name. Returns NULLT in case that the tag name does not exists. */ - TagType GetTagId(unsigned char *tagname); - - /** GetTagName(tagid): returns the tag name of a given tag identifier. - * Returns NULL in case that the tag identifier is not valid.*/ - unsigned char *GetTagName(TagType tagid); - - /** GetTagName(tagid): returns the tag name of a given tag identifier. - * The result is just a reference and should not be freed by the caller. - */ - const unsigned char *GetTagNameByRef(TagType tagid); - - /** RegisterTag adds a new tag to the tag collection this is needed - * if the query contains a tag which is not in the document, we need - * to give this new tag a fresh id and store it somewhere. A logical - * choice is here. - * We might want to use a hashtable instead of an array though. - */ - TagType RegisterTag(unsigned char *tagname); - - bool EmptyText(DocID i) { - return Text->EmptyText(i); - } - - /** Prefix(s): search for texts prefixed by string s. */ - TextCollection::document_result Prefix(uchar const *s) { - return Text->Prefix(s); - } - - /** Suffix(s): search for texts having string s as a suffix. */ - TextCollection::document_result Suffix(uchar const *s) { - return Text->Suffix(s); - } - - /** Equal(s): search for texts equal to string s. */ - TextCollection::document_result Equals(uchar const *s) { - return Text->Equal(s); - } - - /** Contains(s): search for texts containing string s. */ - TextCollection::document_result Contains(uchar const *s) { - return Text->Contains(s); - } - - /** LessThan(s): returns document identifiers for the texts that - * are lexicographically smaller than string s. */ - TextCollection::document_result LessThan(uchar const *s) { - return Text->LessThan(s); - } - - /** IsPrefix(x): returns true if there is a text prefixed by string s. */ - bool IsPrefix(uchar const *s) { - return Text->IsPrefix(s); - } - - /** IsSuffix(s): returns true if there is a text having string s as a - * suffix.*/ - bool IsSuffix(uchar const *s) { - return Text->IsSuffix(s); - } - - /** IsEqual(s): returns true if there is a text that equals given - * string s. */ - bool IsEqual(uchar const *s) { - return Text->IsEqual(s); - } - - /** IsContains(s): returns true if there is a text containing string s. */ - bool IsContains(uchar const *s) { - return Text->IsContains(s); - } - - /** IsLessThan(s): returns true if there is at least a text that is - * lexicographically smaller than string s. */ - bool IsLessThan(uchar const *s) { - return Text->IsLessThan(s); - } - - /** Count(s): Global counting */ - unsigned Count(uchar const *s) { - return Text->Count(s); - } - - /** CountPrefix(s): counting version of Prefix(s). */ - unsigned CountPrefix(uchar const *s) { - return Text->CountPrefix(s); - } - - /** CountSuffix(s): counting version of Suffix(s). */ - unsigned CountSuffix(uchar const *s) { - return Text->CountSuffix(s); - } - - /** CountEqual(s): counting version of Equal(s). */ - unsigned CountEqual(uchar const *s) { - return Text->CountEqual(s); - } - - /** CountContains(s): counting version of Contains(s). */ - unsigned CountContains(uchar const *s) { - return Text->CountContains(s); - } - - /** CountLessThan(s): counting version of LessThan(s). */ - unsigned CountLessThan(uchar const *s) { - return Text->CountLessThan(s); - } - - /** GetText(d): returns the text corresponding to document with - * id d. */ - uchar* GetText(DocID d) { - - uchar * s = Text->GetText(d); - return (s[0] == 1 ? (s+1) : s); - } - - /** GetText(i, j): returns the texts corresponding to documents with - * ids i, i+1, ..., j. Texts are separated by '\0' character. */ - // uchar* GetText(DocID i, DocID j) { - // uchar * s = Text->GetText(i, j); - // return (s[0] == 1 ? (uchar*)"" : s); - //} - - TextCollection *getTextCollection() { - return Text; - } - - /** Save: saves XML tree data structure to file. */ - void Save(int fd, char *filename); - - /** Load: loads XML tree data structure from file. sample_rate_text - * indicates the sample rate for the text search data structure. */ - static XMLTree *Load(int fd, char *filename, bool load_tc, int sample_factor); - - void insertTag(TagType tag, uint position); - - void print_stats(); - - - /** Parenthesis functions */ - treeNode Closing(treeNode x); - - bool IsOpen(treeNode x); - - - /** Print procedure */ - void Print(int fd,treeNode x, bool no_text); - void Print(int fd,treeNode x) { Print(fd,x,false); } - - // The following are inlined here for speed - /** Tag(x): returns the tag identifier of node x. */ - - inline TagType Tag(treeNode x) const throw () { - if (tags_blen == 8) - return (TagType) (((uchar*)tags_fix)[(int) x]); - else - return get_field(tags_fix, tags_blen, x); - /* - { - size_t idxlen = x * tags_blen; - size_t j = idxlen % W; - size_t i = idxlen / W; - size_t offset = W - tags_blen; - size_t offset2 = offset - j; - size_t w = tags_fix[i]; - return (offset2 >= 0) - ? ( w << offset2 ) >> offset - : ( w >> j) | (tags_fix[i+1] << (W+offset2)) >> offset; - }; */ - - } - - /** FirstChild(x): returns the first child of node x, or NULLT if the node is a leaf - */ - treeNode FirstChild(treeNode x) { - NULLT_IF(x==NULLT); - return fast_first_child(Par, x); - }; - - - /** FirstElement(x): returns the first non text, non attribute child of node x, or NULLT - * if none. - */ - treeNode FirstElement(treeNode x){ - { - NULLT_IF(x==NULLT); - x = fast_first_child(Par, x); - NULLT_IF(x == NULLT); - switch (Tag(x)){ - - case PCDATA_TAG_ID: - x = x+2; - return (fast_inspect(Par,x)==OP)? x : NULLT; - - case ATTRIBUTE_TAG_ID: - x = fast_next_sibling(Par,x); - if (x != NULLT && Tag(x) == PCDATA_TAG_ID){ - x = x+2; - return (fast_inspect(Par,x)==OP)? x : NULLT; - } - else return x; - default: - return x; - } - } - }; - - /** NextSibling(x): returns the next sibling of node x, or NULLT if none - * exists. */ - - treeNode NextSibling(treeNode x) { - NULLT_IF (x <= 0); - return fast_next_sibling(Par, x); - }; - - /** NextElement(x): returns the first non text, non attribute sibling of node x, or NULLT - * if none. - */ - treeNode NextElement(treeNode x) - { - NULLT_IF(x <= 0); - x = fast_next_sibling(Par, x); - NULLT_IF(x == NULLT); - if (Tag(x) == PCDATA_TAG_ID){ - x = x+2; - return (fast_inspect(Par,x)==OP)? x : NULLT; - } - else return x; - }; - /** TaggedDesc(x,tag): returns the first node tagged tag with larger - * preorder than x and within the subtree of x. Returns NULT if there - * is none. */ - inline treeNode TaggedDescendant(treeNode x, TagType tag) - { - - int s = (int) Tags->select_next(tag,node2tagpos(x)); - NULLT_IF (s == -1); - - treeNode y = tagpos2node(s); // transforms the tag position into a node position - - return (fast_is_ancestor(Par,x,y) ? y : NULLT); - }; - - inline treeNode TaggedFollowingBelow(treeNode x, TagType tag, treeNode ancestor) - { - treeNode close = fast_find_close(Par, x); - treeNode s = tagpos2node(Tags->select_next(tag, close)); - - if (ancestor == Root() || s == NULLT || s < fast_find_close(Par,ancestor)) return s; - else return NULLT; - }; - - inline treeNode TaggedFollowingBefore(treeNode x, TagType tag, treeNode ancestor_closing) - { - treeNode close = fast_find_close(Par, x); - treeNode s = tagpos2node(Tags->select_next(tag, close)); - - if (ancestor_closing == Root() || s == NULLT || s < ancestor_closing) return s; - else return NULLT; - }; - -// TaggedSibling(x,tag): returns the first sibling of node x tagged tag, or NULLT if there is none. -treeNode TaggedFollowingSibling(treeNode x, TagType tag) -{ - NULLT_IF(x==NULLT); - treeNode sibling = x; - TagType ctag; - while ((sibling = fast_next_sibling(Par, sibling)) != NULLT) { - ctag = Tag(sibling); - if (ctag == tag) return sibling; - } - return NULLT; -}; - -treeNode TaggedChild(treeNode x, TagType tag) -{ - - NULLT_IF(x==NULLT || fast_isleaf(Par,x)); - treeNode child; - child = fast_first_child(Par, x); - - if (Tag(child) == tag) - return child; - else - return TaggedFollowingSibling(child, tag); -}; - - -}; - - -#endif - +/****************************************************************************** + * Copyright (C) 2008 by Diego Arroyuelo * + * Interface for the in-memory XQuery/XPath engine * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU Lesser General Public License as published * + * by the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU Lesser General Public License for more details. * + * * + * You should have received a copy of the GNU Lesser General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ******************************************************************************/ + +#ifndef XMLTREE_H_ +#define XMLTREE_H_ + + +#include +#include +#include +#include "TextCollection/TextCollectionBuilder.h" + +#undef W +#undef WW +#undef Wminusone + +#include "bp.h" +#include +#include +#include +#include +using SXSI::TextCollection; +using SXSI::TextCollectionBuilder; + + +// this constant is used to efficiently compute the child operation in the tree +#define OPTD 10 + +#define NULLT -1 + +#define PERM_SAMPLE 10 + + +typedef int treeNode; +typedef int TagType; +typedef int DocID; + +typedef struct { + int min; + int max; +} range; + +// Encoding of the XML Document : +// The following TAGs and IDs are fixed, "" is the tag of the root. +// a TextNode is represented by a leaf <<$>>> The DocId in the TextCollection +// of that leaf is kept in a bit sequence. +// a TextNode below an attribute is likewise represented by a leaf <<@$>><> +// An element ... the representation is: +// <<@>> <<@>a1> <<$@>>DocID(v1)>a1> ... > .... +// Hence the attributes (if any) are always below the first child of their element, +// as the children of a fake node <@>. + + +#define DOCUMENT_OPEN_TAG "" +#define DOCUMENT_TAG_ID 0 +#define ATTRIBUTE_OPEN_TAG "<@>" +#define ATTRIBUTE_TAG_ID 1 +#define PCDATA_OPEN_TAG "<$>" +#define PCDATA_TAG_ID 2 +#define ATTRIBUTE_DATA_OPEN_TAG "<@$>" +#define ATTRIBUTE_DATA_TAG_ID 3 +#define CLOSING_TAG "" +#define CLOSING_TAG_ID 4 +#define DOCUMENT_CLOSE_TAG "/" +#define ATTRIBUTE_CLOSE_TAG "/<@>" +#define PCDATA_CLOSE_TAG "/<$>" +#define ATTRIBUTE_DATA_CLOSE_TAG "/<@$>" + + +typedef std::unordered_set TagIdSet; +typedef std::unordered_map TagIdMap; +typedef TagIdMap::const_iterator TagIdMapIT; + +#define REGISTER_TAG(v,h,t) do { (h)->insert(std::make_pair((t),(v)->size()));\ + (v)->push_back(t); } while (false) + +// returns NULLT if the test is true +#define NULLT_IF(x) do { if (x) return NULLT; } while (0) + +// Direct calls to sarray library + +static inline int fast_find_close(bp *b,int s) +{ + return fwd_excess(b,s,-1); +} + +static inline int fast_inspect(bp* Par,treeNode i) +{ + int j,l; + j = i >> logD; + l = i & (D-1); + return (Par->B[j] >> (D-1-l)) & 1; +} + +static bool fast_isleaf(bp* Par,treeNode x){ + return (fast_inspect(Par, x+1) == CP); +} + +static treeNode fast_first_child(bp *Par, treeNode x) +{ + x = x+1; + return (fast_inspect(Par,x) == OP) ? x : NULLT; +} + +inline static treeNode fast_next_sibling(bp* Par,treeNode x) +{ + treeNode y = fast_find_close(Par,x)+1; + return (fast_inspect(Par, y) == OP) ? y : NULLT; +} + +inline static bool fast_is_ancestor(bp * Par,treeNode x,treeNode y){ + return (x <= y) && ((x==0) || (y <= fast_find_close(Par,x))); +} + +// tag position -> tree node +static treeNode tagpos2node(int t) + { + return (treeNode) t; + } +// tree node -> tag position +static int node2tagpos(treeNode x) +{ + return (int)x; +} + + +class XMLTreeBuilder; + +class XMLTree { + + // Only the builder can access the constructor + friend class XMLTreeBuilder; + + private: + /** Balanced parentheses representation of the tree */ + bp *Par; + + /** Mapping from tag identifer to tag name */ + std::vector *TagName; + TagIdMap * tIdMap; + + /** Bit vector indicating with a 1 the positions of the non-empty texts. */ + static_bitsequence *EBVector; + + /** Tag sequence represented with a data structure for rank and select */ + static_sequence *Tags; + uint * tags_fix; + uint tags_blen, tags_len; + + /** The texts in the XML document */ + TextCollection *Text; + + // Allows to disable the TextCollection for benchmarkin purposes + bool disable_tc; + + FILE* stream; + int stream_fd; + std::string * buffer; + + void _flush(int fd){ + size_t size = buffer->size(); + size_t written = write(fd, buffer->data(), size); + if (written != size) + throw "Cannot flush buffer"; + buffer->clear(); + } + + void _dputs(const char* s, int fd){ + buffer->append(s); + if (buffer->size() >= 131072) _flush(fd); + + } + + void _dputc(const char c, int fd){ + buffer->append(1,c); + if (buffer->size() >= 131072) _flush(fd); + } + + size_t _dprintf(const char* s, int fd){ + if (s == NULL) return 0; + size_t i = 0; + while (1) { + switch (s[i]) { + case '&': + buffer->append("&"); + break; + case '\'': + buffer->append("'"); + break; + case '"': + buffer->append("""); + break; + case '<': + buffer->append("<"); + break; + case '>': + buffer->append(">"); + break; + case 0: + return i; + default: + buffer->append(1, s[i]); + + }; + if (buffer->size() >= 131072) _flush(fd); + ++i; + }; + } + + void PrintNode(treeNode n, int fd); + /** Data structure constructors */ + XMLTree(){ buffer = 0;}; + + // non const pointer are freed by this method. + XMLTree( pb * const par, uint npar, std::vector * const TN, TagIdMap * const tim, uint *empty_texts_bmp, TagType *tags, + TextCollection * const TC, bool dis_tc); + +public: + /** Data structure destructor */ + ~XMLTree(); + + /** root(): returns the tree root. */ + treeNode Root() { return 0; } + + /** Size() : Number of parenthesis */ + unsigned int Size(){ + return tags_len/2; + } + + + /** NumTags() : Number of distinct tags */ + unsigned int NumTags() { + return TagName->size(); + } + + int TagsBinaryLength(){ return tags_blen; }; + unsigned int TagStructLength(){ return uint_len(tags_blen,tags_len); }; + unsigned int * TagStruct() { return tags_fix; }; + + + /** SubtreeSize(x): the number of nodes (and attributes) in the subtree of + * node x. */ + int SubtreeSize(treeNode x) { return subtree_size(Par, x); } + + /** SubtreeTags(x,tag): the number of occurrences of tag within the subtree + * of node x. */ + int SubtreeTags(treeNode x, TagType tag){ + //int s = x + 2*subtree_size(Par, x) - 1; + treeNode y = fast_find_close(Par, x); + + if (y - x < 10) { + int count = 0; + for(int i = x; i < y; i++) + count += (Tag(i) == tag); + return count; + } + else + return (Tags->rank(tag, y) - Tags->rank(tag, x)); + }; + + /** SubtreeElements(x) of element nodes in the subtree of x + */ + int SubtreeElements(treeNode x); + + /** IsLeaf(x): returns whether node x is leaf or not. In the succinct + * representation this is just a bit inspection. */ + + bool IsLeaf(treeNode x); + + /** IsAncestor(x,y): returns whether node x is ancestor of node y. */ + + bool IsAncestor(treeNode x, treeNode y); + + /** IsChild(x,y): returns whether node x is parent of node y. */ + bool IsChild(treeNode x, treeNode y); + + /** IsFirstChild(x): returns whether node x is the first child of its parent. */ + /* OCAML */ + bool IsFirstChild(treeNode x) { + return ((x != NULLT)&&(x==Root() || prev_sibling(Par,x) == (treeNode)-1)); + }; + + /** NumChildren(x): number of children of node x. Constant time with the + * data structure of Sadakane. */ + int NumChildren(treeNode x); + + /** ChildNumber(x): returns i if node x is the i-th children of its + * parent. */ + int ChildNumber(treeNode x); + + /** Depth(x): depth of node x, a simple binary rank on the parentheses + * sequence. */ + int Depth(treeNode x); + + /** Preorder(x): returns the preorder number of node x, just regarding tree + * nodes (and not texts). */ + int Preorder(treeNode x); + + /** Postorder(x): returns the postorder number of node x, just regarding + * tree nodes (and not texts). */ + int Postorder(treeNode x); + + + /** DocIds(x): returns the range (i.e., a pair of integers) of document + * identifiers that descend from node x. */ + range DocIds(treeNode x); + + /** Parent(x): returns the parent node of node x. */ + treeNode Parent(treeNode x) { + if (x == Root()) + return NULLT; + else + return parent(Par, x); + }; + /* Assumes x is neither 0 nor -1 */ + + /** Child(x,i): returns the i-th child of node x, assuming it exists. */ + treeNode Child(treeNode x, int i); + + + + /** LastChild(x): returns the last child of node x. */ + treeNode LastChild(treeNode x); + + + + /** PrevSibling(x): returns the previous sibling of node x, assuming it + * exists. */ + + treeNode PrevSibling(treeNode x); + + /** TaggedChild(x,tag): returns the first child of node x tagged tag, or + * NULLT if there is none. Because of the balanced-parentheses representation + * of the tree, this operation is not supported efficiently, just iterating + * among the children of node x until finding the desired child. */ + + + treeNode SelectChild(treeNode x, TagIdSet * tags); + + /** TaggedFollowingSibling(x,tag): returns the first sibling of node x tagged tag, or + * NULLT if there is none. */ + + treeNode SelectFollowingSibling(treeNode x, TagIdSet * tags); + + + + + treeNode SelectDescendant(treeNode x, TagIdSet * tags) { + NULLT_IF (x == NULLT); + treeNode fc = x+1; + if (fast_inspect(Par, fc) == CP) return NULLT; + treeNode min = NULLT; + treeNode aux; + + TagIdSet::const_iterator tagit; + for (tagit = tags->begin(); tagit != tags->end(); ++tagit) { + aux = TaggedDescendant(x, (TagType) *tagit); + if (((unsigned int) aux) < ((unsigned int) min)) min = aux; + }; + return min; + } + + /** TaggedPrec(x,tag): returns the first node tagged tag with smaller + * preorder than x and not an ancestor of x. Returns NULLT if there + * is none. */ + treeNode TaggedPreceding(treeNode x, TagType tag); + + /** TaggedFoll(x,tag): returns the first node tagged tag with larger + * preorder than x and not in the subtree of x. Returns NULLT if there + * is none. */ + treeNode TaggedFollowing(treeNode x, TagType tag); + + + + treeNode SelectFollowingBelow(treeNode x, TagIdSet * tags, treeNode ancestor); + + // treeNode TaggedFollowingBefore(treeNode x, TagType tag,treeNode closing); + + treeNode SelectFollowingBefore(treeNode x, TagIdSet * tags, treeNode ancestor_closing) + { + + NULLT_IF(x <= 0); + + treeNode close = fast_find_close(Par,x); + + + treeNode min = NULLT; + treeNode aux; + + + TagIdSet::const_iterator tagit; + for (tagit = tags->begin(); tagit != tags->end(); ++tagit) { + + aux = tagpos2node(Tags->select_next(*tagit, close)); + + if (((unsigned int) aux) < ((unsigned int) min)) min = aux; + }; + + + return (min < ancestor_closing) ? min : NULLT; + + } + + /** TaggedAncestor(x, tag): returns the closest ancestor of x tagged + * tag. Return NULLT is there is none. */ + treeNode TaggedAncestor(treeNode x, TagType tag); + + /** PrevText(x): returns the document identifier of the text to the left of + * node x, or NULLT if x is the root node. */ + DocID PrevText(treeNode x); + + /** NextText(x): returns the document identifier of the text to the right of + * node x, or NULLT if x is the root node. */ + DocID NextText(treeNode x); + + /** MyText(x): returns the document identifier of the text below node x, or + * NULLT if x is not a leaf node. */ + DocID MyText(treeNode x); + DocID MyTextUnsafe(treeNode x); + + /** TextXMLId(d): returns the preorder of document with identifier d in the + * tree consisting of all tree nodes and all text nodes. */ + int TextXMLId(DocID d); + + /** NodeXMLId(x): returns the preorder of node x in the tree consisting of + * all tree nodes and all text nodes. */ + int NodeXMLId(treeNode x); + + /** ParentNode(d): returns the parent node of document identifier d. */ + treeNode ParentNode(DocID d); + + treeNode PrevNode(DocID d); + + /** GetTagId(tagname): returns the tag identifier corresponding to a given + * tag name. Returns NULLT in case that the tag name does not exists. */ + TagType GetTagId(unsigned char *tagname); + + /** GetTagName(tagid): returns the tag name of a given tag identifier. + * Returns NULL in case that the tag identifier is not valid.*/ + unsigned char *GetTagName(TagType tagid); + + /** GetTagName(tagid): returns the tag name of a given tag identifier. + * The result is just a reference and should not be freed by the caller. + */ + const unsigned char *GetTagNameByRef(TagType tagid); + + /** RegisterTag adds a new tag to the tag collection this is needed + * if the query contains a tag which is not in the document, we need + * to give this new tag a fresh id and store it somewhere. A logical + * choice is here. + * We might want to use a hashtable instead of an array though. + */ + TagType RegisterTag(unsigned char *tagname); + + bool EmptyText(DocID i) { + return Text->EmptyText(i); + } + + /** Prefix(s): search for texts prefixed by string s. */ + TextCollection::document_result Prefix(uchar const *s) { + return Text->Prefix(s); + } + + /** Suffix(s): search for texts having string s as a suffix. */ + TextCollection::document_result Suffix(uchar const *s) { + return Text->Suffix(s); + } + + /** Equal(s): search for texts equal to string s. */ + TextCollection::document_result Equals(uchar const *s) { + return Text->Equal(s); + } + + /** Contains(s): search for texts containing string s. */ + TextCollection::document_result Contains(uchar const *s) { + return Text->Contains(s); + } + + /** LessThan(s): returns document identifiers for the texts that + * are lexicographically smaller than string s. */ + TextCollection::document_result LessThan(uchar const *s) { + return Text->LessThan(s); + } + + /** IsPrefix(x): returns true if there is a text prefixed by string s. */ + bool IsPrefix(uchar const *s) { + return Text->IsPrefix(s); + } + + /** IsSuffix(s): returns true if there is a text having string s as a + * suffix.*/ + bool IsSuffix(uchar const *s) { + return Text->IsSuffix(s); + } + + /** IsEqual(s): returns true if there is a text that equals given + * string s. */ + bool IsEqual(uchar const *s) { + return Text->IsEqual(s); + } + + /** IsContains(s): returns true if there is a text containing string s. */ + bool IsContains(uchar const *s) { + return Text->IsContains(s); + } + + /** IsLessThan(s): returns true if there is at least a text that is + * lexicographically smaller than string s. */ + bool IsLessThan(uchar const *s) { + return Text->IsLessThan(s); + } + + /** Count(s): Global counting */ + unsigned Count(uchar const *s) { + return Text->Count(s); + } + + /** CountPrefix(s): counting version of Prefix(s). */ + unsigned CountPrefix(uchar const *s) { + return Text->CountPrefix(s); + } + + /** CountSuffix(s): counting version of Suffix(s). */ + unsigned CountSuffix(uchar const *s) { + return Text->CountSuffix(s); + } + + /** CountEqual(s): counting version of Equal(s). */ + unsigned CountEqual(uchar const *s) { + return Text->CountEqual(s); + } + + /** CountContains(s): counting version of Contains(s). */ + unsigned CountContains(uchar const *s) { + return Text->CountContains(s); + } + + /** CountLessThan(s): counting version of LessThan(s). */ + unsigned CountLessThan(uchar const *s) { + return Text->CountLessThan(s); + } + + /** GetText(d): returns the text corresponding to document with + * id d. */ + uchar* GetText(DocID d) { + + uchar * s = Text->GetText(d); + return (s[0] == 1 ? (s+1) : s); + } + + /** GetText(i, j): returns the texts corresponding to documents with + * ids i, i+1, ..., j. Texts are separated by '\0' character. */ + // uchar* GetText(DocID i, DocID j) { + // uchar * s = Text->GetText(i, j); + // return (s[0] == 1 ? (uchar*)"" : s); + //} + + TextCollection *getTextCollection() { + return Text; + } + + /** Save: saves XML tree data structure to file. */ + void Save(int fd, char *filename); + + /** Load: loads XML tree data structure from file. sample_rate_text + * indicates the sample rate for the text search data structure. */ + static XMLTree *Load(int fd, char *filename, bool load_tc, int sample_factor); + + void insertTag(TagType tag, uint position); + + void print_stats(); + + + /** Parenthesis functions */ + treeNode Closing(treeNode x); + + bool IsOpen(treeNode x); + + + /** Print procedure */ + void Print(int fd,treeNode x, bool no_text); + void Print(int fd,treeNode x) { Print(fd,x,false); } + + // The following are inlined here for speed + /** Tag(x): returns the tag identifier of node x. */ + + inline TagType Tag(treeNode x) const throw () { + if (tags_blen == 8) + return (TagType) (((uchar*)tags_fix)[(int) x]); + else + return get_field(tags_fix, tags_blen, x); + /* + { + size_t idxlen = x * tags_blen; + size_t j = idxlen % W; + size_t i = idxlen / W; + size_t offset = W - tags_blen; + size_t offset2 = offset - j; + size_t w = tags_fix[i]; + return (offset2 >= 0) + ? ( w << offset2 ) >> offset + : ( w >> j) | (tags_fix[i+1] << (W+offset2)) >> offset; + }; */ + + } + + /** FirstChild(x): returns the first child of node x, or NULLT if the node is a leaf + */ + treeNode FirstChild(treeNode x) { + NULLT_IF(x==NULLT); + return fast_first_child(Par, x); + }; + + + /** FirstElement(x): returns the first non text, non attribute child of node x, or NULLT + * if none. + */ + treeNode FirstElement(treeNode node){ + { + NULLT_IF(node == NULLT); + treeNode x = fast_first_child(Par, node); + NULLT_IF(x == NULLT); + switch (Tag(x)){ + + case ATTRIBUTE_TAG_ID: + x = fast_next_sibling(Par,x); + if (x == NULLT || Tag(x) != PCDATA_TAG_ID) return x; + + case PCDATA_TAG_ID: + x = x+2; + return (fast_inspect(Par,x)==OP)? x : NULLT; + + default: + return x; + } + } + }; + + /** NextSibling(x): returns the next sibling of node x, or NULLT if none + * exists. */ + + treeNode NextSibling(treeNode x) { + NULLT_IF (x <= 0); + return fast_next_sibling(Par, x); + }; + + /** NextElement(x): returns the first non text, non attribute sibling of node x, or NULLT + * if none. + */ + treeNode NextElement(treeNode x) + { + NULLT_IF(x <= 0); + x = fast_next_sibling(Par, x); + NULLT_IF(x == NULLT); + if (Tag(x) == PCDATA_TAG_ID){ + int y = x+2; + return (fast_inspect(Par, y) == OP) ? y : NULLT; + } + else return x; + }; + /** TaggedDesc(x,tag): returns the first node tagged tag with larger + * preorder than x and within the subtree of x. Returns NULT if there + * is none. */ + inline treeNode TaggedDescendant(treeNode x, TagType tag) + { + + int s = (int) Tags->select_next(tag,node2tagpos(x)); + NULLT_IF (s == -1); + + treeNode y = tagpos2node(s); // transforms the tag position into a node position + + return (fast_is_ancestor(Par,x,y) ? y : NULLT); + }; + + inline treeNode TaggedFollowingBelow(treeNode x, TagType tag, treeNode ancestor) + { + treeNode close = fast_find_close(Par, x); + treeNode s = tagpos2node(Tags->select_next(tag, close)); + + if (ancestor == Root() || s == NULLT || s < fast_find_close(Par,ancestor)) return s; + else return NULLT; + }; + + inline treeNode TaggedFollowingBefore(treeNode x, TagType tag, treeNode ancestor_closing) + { + treeNode close = fast_find_close(Par, x); + treeNode s = tagpos2node(Tags->select_next(tag, close)); + + if (ancestor_closing == Root() || s == NULLT || s < ancestor_closing) return s; + else return NULLT; + }; + +// TaggedSibling(x,tag): returns the first sibling of node x tagged tag, or NULLT if there is none. +treeNode TaggedFollowingSibling(treeNode x, TagType tag) +{ + NULLT_IF(x==NULLT); + treeNode sibling = x; + TagType ctag; + while ((sibling = fast_next_sibling(Par, sibling)) != NULLT) { + ctag = Tag(sibling); + if (ctag == tag) return sibling; + } + return NULLT; +}; + +treeNode TaggedChild(treeNode x, TagType tag) +{ + + NULLT_IF(x==NULLT || fast_isleaf(Par,x)); + treeNode child; + child = fast_first_child(Par, x); + +if (Tag(child) == tag) + return child; + else + return TaggedFollowingSibling(child, tag); +}; + + +}; + + +#endif + diff --git a/bpcore.c b/bpcore.c index acb37bf..df39e2c 100644 --- a/bpcore.c +++ b/bpcore.c @@ -1,11 +1,13 @@ #include #include #include "bp.h" + #ifndef min - #define min(x,y) ((x)<(y)?(x):(y)) +#define min(x,y) ((x)<(y)?(x):(y)) #endif + #ifndef max - #define max(x,y) ((x)>(y)?(x):(y)) +#define max(x,y) ((x)>(y)?(x):(y)) #endif #define NOTFOUND -2 @@ -177,59 +179,42 @@ int fwd_excess(bp *b,int s, int rel) n = b->n; B = b->B; i = s+1; -#if 1 + d = search_SB_r(b,i,rel); if (d >= NOTFOUND) return d; - i = min((SBid(i) + 1) << logSB,n); + i = min((SBid(i) + 1) << logSB, n); td = depth(b,s) + rel; d = search_MB_r(b,i,td); if (d >= NOTFOUND) return d; -#else - if (i != SBfirst(i)) { - d = search_SB_r(b,i,rel); - if (d >= NOTFOUND) return d; - } - - td = depth(b,s) + rel; - - i = SBid(i+SB-1) << logSB; - - if (i != MBfirst(i)) { - d = search_MB_r(b,i,td); - if (d >= NOTFOUND) return d; - } -#endif m_ofs = b->m_ofs; + i = MBid(s) + m_ofs; + while (i > 0) { if ((i&1) == 0) { i++; m = b->mm[i]; M = b->mM[i]; - if (m <= td && td <= M) break; - } - i >>= 1; - } - if (i == 0) return NOTFOUND; - while (i < m_ofs) { - i <<= 1; - m = b->mm[i]; - M = b->mM[i]; - if (!(m <= td && td <= M)) i++; - } - i -= m_ofs; - i <<= logMB; + if (m <= td && td <= M) { + while (i < m_ofs) { + i <<= 1; + m = b->mm[i]; + M = b->mM[i]; + if (!(m <= td && td <= M)) i++; + } + i -= m_ofs; + i <<= logMB; - d = search_MB_r(b,i,td); - if (d >= NOTFOUND) return d; - - // unexpected (bug) - printf("fwd_excess: ???\n"); - return -99; + return search_MB_r(b,i,td); + }; + } + i >>= 1; + } + return NOTFOUND; }