XMLTree.h

   1 /******************************************************************************\r
   2  *   Copyright (C) 2008 by Diego Arroyuelo                                    *\r
   3  *   Interface for the in-memory XQuery/XPath engine                          *\r
   4  *                                                                            *\r
   5  *   This program is free software; you can redistribute it and/or modify     *\r
   6  *   it under the terms of the GNU Lesser General Public License as published *\r
   7  *   by the Free Software Foundation; either version 2 of the License, or     *\r
   8  *   (at your option) any later version.                                      *\r
   9  *                                                                            *\r
  10  *   This program is distributed in the hope that it will be useful,          *\r
  11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of           *\r
  12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            *\r
  13  *   GNU Lesser General Public License for more details.                      *\r
  14  *                                                                            *\r
  15  *   You should have received a copy of the GNU Lesser General Public License *\r
  16  *   along with this program; if not, write to the                            *\r
  17  *   Free Software Foundation, Inc.,                                          *\r
  18  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.                *\r
  19  ******************************************************************************/\r
  20 \r
  21 #ifndef XMLTREE_H_\r
  22 #define XMLTREE_H_\r
  23 \r
  24 \r
  25 #include <unordered_set>\r
  26 #include <unordered_map>\r
  27 #include <sstream>\r
  28 #include "TextCollection/TextCollectionBuilder.h"\r
  29 \r
  30 #undef W\r
  31 #undef WW\r
  32 #undef Wminusone\r
  33 \r
  34 #include "bp.h"\r
  35 #include <libcds/includes/basics.h>\r
  36 #include <static_bitsequence.h>\r
  37 #include <alphabet_mapper.h>\r
  38 #include <static_sequence.h>\r
  39 using SXSI::TextCollection;\r
  40 using SXSI::TextCollectionBuilder;\r
  41 \r
  42 \r
  43 // this constant is used to efficiently compute the child operation in the tree\r
  44 #define OPTD 10\r
  45 \r
  46 #define NULLT -1\r
  47 \r
  48 #define PERM_SAMPLE 10\r
  49 \r
  50 \r
  51 typedef int treeNode;\r
  52 typedef int TagType; \r
  53 typedef int DocID;  \r
  54 \r
  55 typedef struct {\r
  56    int min;\r
  57    int max;\r
  58 } range;\r
  59 \r
  60 // Encoding of the XML Document :\r
  61 // The following TAGs and IDs are fixed, "" is the tag of the root.\r
  62 // a TextNode is represented by a leaf <<$>></<$>> The DocId in the TextCollection\r
  63 // of that leaf is kept in a bit sequence.\r
  64 // a TextNode below an attribute is likewise represented by a leaf <<@$>><</@$>>\r
  65 // An element <e a1="v1" a2="v2" ... an="vn" > ...</e> the representation is:\r
  66 // <e><<@>> <<@>a1> <<$@>>DocID(v1)</<$@>></<@>a1> ... </<@>> .... </e>\r
  67 // Hence the attributes (if any) are always below the first child of their element,\r
  68 // as the children of a fake node <@>.\r
  69 \r
  70 \r
  71 #define DOCUMENT_OPEN_TAG ""\r
  72 #define DOCUMENT_TAG_ID 0\r
  73 #define ATTRIBUTE_OPEN_TAG "<@>"\r
  74 #define ATTRIBUTE_TAG_ID 1\r
  75 #define PCDATA_OPEN_TAG "<$>"\r
  76 #define PCDATA_TAG_ID 2\r
  77 #define ATTRIBUTE_DATA_OPEN_TAG "<@$>"\r
  78 #define ATTRIBUTE_DATA_TAG_ID 3\r
  79 #define CLOSING_TAG   "</>"\r
  80 #define CLOSING_TAG_ID 4\r
  81 #define DOCUMENT_CLOSE_TAG "/"\r
  82 #define ATTRIBUTE_CLOSE_TAG "/<@>"\r
  83 #define PCDATA_CLOSE_TAG "/<$>"\r
  84 #define ATTRIBUTE_DATA_CLOSE_TAG "/<@$>"\r
  85 \r
  86 \r
  87 typedef std::unordered_set<int> TagIdSet;\r
  88 typedef std::unordered_map<std::string,int> TagIdMap;\r
  89 typedef TagIdMap::const_iterator TagIdMapIT;\r
  90 \r
  91 #define REGISTER_TAG(v,h,t) do { (h)->insert(std::make_pair((t),(v)->size()));\\r
  92     (v)->push_back(t); } while (false)\r
  93 \r
  94 // returns NULLT if the test is true\r
  95 #define NULLT_IF(x)  do { if (x) return NULLT; } while (0)\r
  96 \r
  97 // Direct calls to sarray library\r
  98 \r
  99 static inline int fast_find_close(bp *b,int s)\r
 100 {\r
 101   return fwd_excess(b,s,-1);\r
 102 }\r
 103 \r
 104 static inline int fast_inspect(bp* Par,treeNode i)\r
 105 {\r
 106   int j,l;\r
 107   j = i >> logD;\r
 108   l = i & (D-1);\r
 109   return (Par->B[j] >> (D-1-l)) & 1;\r
 110 }\r
 111 \r
 112 static treeNode fast_first_child(bp *Par, treeNode x)\r
 113 {\r
 114   x = x+1;\r
 115   return (fast_inspect(Par,x) == OP) ? x : NULLT;\r
 116 }\r
 117 \r
 118 inline static treeNode fast_next_sibling(bp* Par,treeNode x)\r
 119 {\r
 120   treeNode y = fast_find_close(Par,x)+1;\r
 121   return (fast_inspect(Par, y) == OP) ? y : NULLT;\r
 122 }\r
 123 \r
 124 inline static bool fast_is_ancestor(bp * Par,treeNode x,treeNode y){\r
 125   return (x <= y) && ((x==0) || (y <= fast_find_close(Par,x)));\r
 126 }\r
 127 \r
 128 // tag position -> tree node\r
 129 static treeNode tagpos2node(int t) \r
 130  {\r
 131     return (treeNode) t;\r
 132  }\r
 133 // tree node -> tag position\r
 134 static int node2tagpos(treeNode x) \r
 135 {\r
 136   return (int)x;\r
 137 }\r
 138 \r
 139 \r
 140 class XMLTreeBuilder;\r
 141 \r
 142 class XMLTree {\r
 143 \r
 144   // Only the builder can access the constructor\r
 145   friend class XMLTreeBuilder;\r
 146 \r
 147  private:\r
 148    /** Balanced parentheses representation of the tree */\r
 149    bp *Par;\r
 150  \r
 151    /** Mapping from tag identifer to tag name */  \r
 152    std::vector<std::string> *TagName;\r
 153    TagIdMap * tIdMap;\r
 154   \r
 155    /** Bit vector indicating with a 1 the positions of the non-empty texts. */\r
 156    static_bitsequence *EBVector;  \r
 157                       \r
 158    /** Tag sequence represented with a data structure for rank and select */\r
 159    static_sequence *Tags;\r
 160    uint * tags_fix;\r
 161    uint tags_blen, tags_len;\r
 162 \r
 163    /** The texts in the XML document */\r
 164    TextCollection *Text;\r
 165 \r
 166    // Allows to disable the TextCollection for benchmarkin purposes\r
 167    bool disable_tc;\r
 168    \r
 169    FILE* stream;\r
 170    int   stream_fd; \r
 171    std::string * buffer;\r
 172    void myfputs(const char* s, FILE * fp){\r
 173      buffer->append(s);\r
 174      if (buffer->size() >= 100000){\r
 175        fputs(buffer->c_str(),fp);\r
 176        buffer->clear();\r
 177      };\r
 178 \r
 179    }\r
 180    void myfputc(const char c, FILE*fp){\r
 181      buffer->append(1,c);\r
 182      if (buffer->size() >= 100000){\r
 183        fputs(buffer->c_str(),fp);\r
 184        buffer->clear();\r
 185      };\r
 186    }\r
 187    void mybufferflush(FILE* fp){\r
 188      fputs(buffer->c_str(), fp);\r
 189      buffer->clear();\r
 190    }\r
 191 \r
 192    size_t myfprintf(const char* s, FILE * fp){\r
 193      if (s == NULL)\r
 194        return 0;\r
 195      size_t i = buffer->size();\r
 196      buffer->append(s);\r
 197      size_t j = buffer->size();\r
 198      if (buffer->size() >= 100000){\r
 199        fputs(buffer->c_str(),fp);\r
 200        buffer->clear();\r
 201      };\r
 202      return (j-i);\r
 203    }\r
 204 \r
 205    void PrintNode(treeNode n, int fd);\r
 206    /** Data structure constructors */\r
 207    XMLTree(){ buffer = 0;};\r
 208 \r
 209    // non const pointer are freed by this method.\r
 210    XMLTree( pb * const par, uint npar,  std::vector<std::string> * const TN,  TagIdMap * const tim, uint *empty_texts_bmp, TagType *tags,\r
 211            TextCollection * const TC, bool dis_tc);\r
 212 \r
 213 public: \r
 214    /** Data structure destructor */\r
 215    ~XMLTree();\r
 216    \r
 217    /** root(): returns the tree root. */\r
 218    treeNode Root() { return 0; }\r
 219 \r
 220    /** Size() :  Number of parenthesis */\r
 221    unsigned int Size(){\r
 222      return tags_len/2;\r
 223    }\r
 224 \r
 225 \r
 226    /** NumTags() : Number of distinct tags */\r
 227    unsigned int NumTags() {\r
 228            return TagName->size();\r
 229    }\r
 230 \r
 231    int TagsBinaryLength(){ return tags_blen; };\r
 232    unsigned int TagStructLength(){ return uint_len(tags_blen,tags_len); };\r
 233    unsigned int * TagStruct() { return tags_fix; };\r
 234 \r
 235 \r
 236    /** SubtreeSize(x): the number of nodes (and attributes) in the subtree of \r
 237     * node x. */\r
 238    int SubtreeSize(treeNode x);\r
 239   \r
 240    /** SubtreeTags(x,tag): the number of occurrences of tag within the subtree \r
 241     * of node x. */\r
 242    int SubtreeTags(treeNode x, TagType tag);\r
 243    \r
 244    /** SubtreeElements(x) of element nodes in the subtree of x\r
 245     */\r
 246    int SubtreeElements(treeNode x);\r
 247 \r
 248    /** IsLeaf(x): returns whether node x is leaf or not. In the succinct \r
 249     * representation this is just a bit inspection. */\r
 250 \r
 251    bool IsLeaf(treeNode x);\r
 252 \r
 253    /** IsAncestor(x,y): returns whether node x is ancestor of node y. */\r
 254 \r
 255    bool IsAncestor(treeNode x, treeNode y);\r
 256   \r
 257    /** IsChild(x,y): returns whether node x is parent of node y. */\r
 258    bool IsChild(treeNode x, treeNode y);\r
 259 \r
 260    /** IsFirstChild(x): returns whether node x is the first child of its parent. */\r
 261    /* OCAML */\r
 262    bool IsFirstChild(treeNode x) { \r
 263            return ((x != NULLT)&&(x==Root() || prev_sibling(Par,x) == (treeNode)-1));\r
 264    };\r
 265      \r
 266    /** NumChildren(x): number of children of node x. Constant time with the \r
 267     * data structure of Sadakane. */\r
 268    int NumChildren(treeNode x);\r
 269 \r
 270    /** ChildNumber(x): returns i if node x is the i-th children of its \r
 271     * parent. */\r
 272    int ChildNumber(treeNode x);\r
 273 \r
 274    /** Depth(x): depth of node x, a simple binary rank on the parentheses \r
 275     * sequence. */\r
 276    int Depth(treeNode x);\r
 277    \r
 278    /** Preorder(x): returns the preorder number of node x, just regarding tree \r
 279     * nodes (and not texts). */ \r
 280    int Preorder(treeNode x);\r
 281    \r
 282    /** Postorder(x): returns the postorder number of node x, just regarding \r
 283     * tree nodes (and not texts). */\r
 284    int Postorder(treeNode x);\r
 285       \r
 286 \r
 287    /** DocIds(x): returns the range (i.e., a pair of integers) of document \r
 288     * identifiers that descend from node x. */\r
 289    range DocIds(treeNode x);\r
 290 \r
 291    /** Parent(x): returns the parent node of node x. */\r
 292    treeNode Parent(treeNode x) {           \r
 293     if (x == Root())\r
 294       return NULLT;\r
 295     else\r
 296       return parent(Par, x);\r
 297    };\r
 298    /* Assumes x is neither 0 nor -1 */\r
 299    \r
 300    /** Child(x,i): returns the i-th child of node x, assuming it exists. */   \r
 301    treeNode Child(treeNode x, int i);\r
 302 \r
 303 \r
 304 \r
 305    /** LastChild(x): returns the last child of node x.  */\r
 306    treeNode LastChild(treeNode x);\r
 307    \r
 308 \r
 309 \r
 310    /** PrevSibling(x): returns the previous sibling of node x, assuming it \r
 311     * exists. */\r
 312 \r
 313    treeNode PrevSibling(treeNode x);\r
 314    \r
 315    /** TaggedChild(x,tag): returns the first child of node x tagged tag, or \r
 316     * NULLT if there is none. Because of the balanced-parentheses representation \r
 317     * of the tree, this operation is not supported efficiently, just iterating \r
 318     * among the children of node x until finding the desired child. */\r
 319    treeNode TaggedChild(treeNode x, TagType tag);\r
 320    \r
 321    treeNode SelectChild(treeNode x, TagIdSet * tags);\r
 322 \r
 323    /** TaggedFollowingSibling(x,tag): returns the first sibling of node x tagged tag, or \r
 324     *  NULLT if there is none. */\r
 325    treeNode TaggedFollowingSibling(treeNode x, TagType tag);\r
 326    \r
 327    treeNode SelectFollowingSibling(treeNode x, TagIdSet * tags);\r
 328 \r
 329 \r
 330 \r
 331 \r
 332   treeNode SelectDescendant(treeNode x, TagIdSet * tags);\r
 333 \r
 334    /** TaggedPrec(x,tag): returns the first node tagged tag with smaller \r
 335     * preorder than x and not an ancestor of x. Returns NULLT if there \r
 336     * is none. */\r
 337    treeNode TaggedPreceding(treeNode x, TagType tag);\r
 338   \r
 339    /** TaggedFoll(x,tag): returns the first node tagged tag with larger \r
 340     * preorder than x and not in the subtree of x. Returns NULLT if there \r
 341     * is none. */\r
 342    treeNode TaggedFollowing(treeNode x, TagType tag);\r
 343 \r
 344 \r
 345 \r
 346    treeNode SelectFollowingBelow(treeNode x, TagIdSet * tags, treeNode ancestor);\r
 347 \r
 348    //   treeNode TaggedFollowingBefore(treeNode x, TagType tag,treeNode closing);\r
 349 \r
 350    treeNode SelectFollowingBefore(treeNode x, TagIdSet * tags, treeNode closing);\r
 351 \r
 352    /** TaggedAncestor(x, tag): returns the closest ancestor of x tagged \r
 353      * tag. Return NULLT is there is none. */\r
 354    treeNode TaggedAncestor(treeNode x, TagType tag);\r
 355  \r
 356    /** PrevText(x): returns the document identifier of the text to the left of \r
 357     * node x, or NULLT if x is the root node. */\r
 358    DocID PrevText(treeNode x);\r
 359    \r
 360    /** NextText(x): returns the document identifier of the text to the right of \r
 361     * node x, or NULLT if x is the root node. */\r
 362    DocID NextText(treeNode x);\r
 363    \r
 364    /** MyText(x): returns the document identifier of the text below node x, or \r
 365     * NULLT if x is not a leaf node. */\r
 366    DocID MyText(treeNode x);\r
 367    DocID MyTextUnsafe(treeNode x);\r
 368 \r
 369    /** TextXMLId(d): returns the preorder of document with identifier d in the \r
 370     * tree consisting of all tree nodes and all text nodes. */\r
 371    int TextXMLId(DocID d);\r
 372    \r
 373    /** NodeXMLId(x): returns the preorder of node x in the tree consisting of \r
 374     * all tree nodes and all text nodes. */\r
 375    int NodeXMLId(treeNode x);\r
 376    \r
 377    /** ParentNode(d): returns the parent node of document identifier d. */\r
 378    treeNode ParentNode(DocID d);\r
 379    \r
 380    treeNode PrevNode(DocID d);\r
 381 \r
 382    /** GetTagId(tagname): returns the tag identifier corresponding to a given \r
 383     * tag name. Returns NULLT in case that the tag name does not exists. */\r
 384    TagType GetTagId(unsigned char *tagname);\r
 385 \r
 386    /** GetTagName(tagid): returns the tag name of a given tag identifier. \r
 387     * Returns NULL in case that the tag identifier is not valid.*/\r
 388    unsigned char *GetTagName(TagType tagid);\r
 389 \r
 390    /** GetTagName(tagid): returns the tag name of a given tag identifier.     \r
 391     *  The result is just a reference and should not be freed by the caller.\r
 392     */\r
 393    const unsigned char *GetTagNameByRef(TagType tagid);\r
 394 \r
 395    /** RegisterTag adds a new tag to the tag collection this is needed\r
 396     * if the query contains a tag which is not in the document, we need\r
 397     * to give this new tag a fresh id and store it somewhere. A logical\r
 398     * choice is here.\r
 399     * We might want to use a hashtable instead of an array though.\r
 400     */\r
 401    TagType RegisterTag(unsigned char *tagname);\r
 402 \r
 403    bool EmptyText(DocID i) {\r
 404        return Text->EmptyText(i);\r
 405    }\r
 406 \r
 407    /** Prefix(s): search for texts prefixed by string s. */\r
 408    TextCollection::document_result Prefix(uchar const *s) {\r
 409       return Text->Prefix(s);\r
 410    }\r
 411 \r
 412    /** Suffix(s): search for texts having string s as a suffix. */\r
 413    TextCollection::document_result Suffix(uchar const *s) {\r
 414       return Text->Suffix(s);\r
 415    }\r
 416 \r
 417    /** Equal(s): search for texts equal to string s. */\r
 418    TextCollection::document_result Equals(uchar const *s) {\r
 419       return Text->Equal(s);\r
 420    }\r
 421 \r
 422    /** Contains(s): search for texts containing string s.  */\r
 423    TextCollection::document_result Contains(uchar const *s) {\r
 424       return Text->Contains(s);\r
 425    }\r
 426 \r
 427    /** LessThan(s): returns document identifiers for the texts that\r
 428     * are lexicographically smaller than string s. */\r
 429    TextCollection::document_result LessThan(uchar const *s) {\r
 430       return Text->LessThan(s);\r
 431    }\r
 432    \r
 433    /** IsPrefix(x): returns true if there is a text prefixed by string s. */\r
 434    bool IsPrefix(uchar const *s) {\r
 435       return Text->IsPrefix(s);\r
 436    }          \r
 437    \r
 438    /** IsSuffix(s): returns true if there is a text having string s as a \r
 439     * suffix.*/\r
 440    bool IsSuffix(uchar const *s) {\r
 441       return Text->IsSuffix(s);\r
 442    }\r
 443    \r
 444    /** IsEqual(s): returns true if there is a text that equals given \r
 445     * string s. */\r
 446    bool IsEqual(uchar const *s) {\r
 447       return Text->IsEqual(s);\r
 448    }\r
 449    \r
 450    /** IsContains(s): returns true if there is a text containing string s. */\r
 451    bool IsContains(uchar const *s) {\r
 452       return Text->IsContains(s);\r
 453    }\r
 454    \r
 455    /** IsLessThan(s): returns true if there is at least a text that is \r
 456     * lexicographically smaller than string s. */\r
 457    bool IsLessThan(uchar const *s) {\r
 458       return Text->IsLessThan(s);\r
 459    }\r
 460    \r
 461    /** Count(s): Global counting  */\r
 462    unsigned Count(uchar const *s) {\r
 463       return Text->Count(s);\r
 464    }\r
 465 \r
 466    /** CountPrefix(s): counting version of Prefix(s). */\r
 467    unsigned CountPrefix(uchar const *s) {\r
 468       return Text->CountPrefix(s);\r
 469    }\r
 470    \r
 471    /** CountSuffix(s): counting version of Suffix(s). */\r
 472    unsigned CountSuffix(uchar const *s) {\r
 473       return Text->CountSuffix(s);\r
 474    }\r
 475    \r
 476    /** CountEqual(s): counting version of Equal(s). */\r
 477    unsigned CountEqual(uchar const *s) {\r
 478       return Text->CountEqual(s);\r
 479    }\r
 480    \r
 481    /** CountContains(s): counting version of Contains(s). */\r
 482    unsigned CountContains(uchar const *s) {\r
 483       return Text->CountContains(s);\r
 484    }\r
 485    \r
 486    /** CountLessThan(s): counting version of LessThan(s). */\r
 487    unsigned CountLessThan(uchar const *s) {\r
 488       return Text->CountLessThan(s);\r
 489    }\r
 490    \r
 491    /** GetText(d): returns the text corresponding to document with\r
 492     * id d. */\r
 493    uchar* GetText(DocID d) {\r
 494      \r
 495        uchar * s = Text->GetText(d);\r
 496        return (s[0] == 1 ? (s+1) : s);\r
 497    }\r
 498 \r
 499    /** GetText(i, j): returns the texts corresponding to documents with\r
 500     * ids i, i+1, ..., j. Texts are separated by '\0' character.  */\r
 501    //   uchar* GetText(DocID i, DocID j) {\r
 502    //  uchar * s = Text->GetText(i, j);\r
 503    // return (s[0] == 1 ? (uchar*)"" : s);\r
 504    //}\r
 505 \r
 506    TextCollection *getTextCollection() {\r
 507       return Text;\r
 508    }\r
 509    \r
 510    /** Save: saves XML tree data structure to file. */\r
 511    void Save(int fd, char *filename);\r
 512       \r
 513    /** Load: loads XML tree data structure from file. sample_rate_text \r
 514     * indicates the sample rate for the text search data structure. */\r
 515    static XMLTree *Load(int fd, char *filename, bool load_tc, int sample_factor);   \r
 516 \r
 517    void insertTag(TagType tag, uint position);\r
 518    \r
 519    void print_stats();\r
 520 \r
 521    \r
 522    /** Parenthesis functions */\r
 523    treeNode Closing(treeNode x);\r
 524 \r
 525    bool IsOpen(treeNode x);\r
 526 \r
 527 \r
 528    /** Print procedure */\r
 529    void Print(int fd,treeNode x, bool no_text);\r
 530    void Print(int fd,treeNode x) { Print(fd,x,false); }\r
 531 \r
 532   // The following are inlined here for speed\r
 533   /** Tag(x): returns the tag identifier of node x. */\r
 534 \r
 535    inline TagType Tag(treeNode x) const throw () {\r
 536           if (tags_blen == 8)\r
 537                   return  (TagType) (((uchar*)tags_fix)[(int) x]);\r
 538           else \r
 539                   return get_field(tags_fix, tags_blen, x);\r
 540                   /*\r
 541                   { \r
 542           size_t idxlen = x * tags_blen;\r
 543           size_t j = idxlen % W;\r
 544           size_t i = idxlen / W; \r
 545           size_t offset = W - tags_blen;\r
 546           size_t offset2 = offset - j;\r
 547           size_t w = tags_fix[i];\r
 548           return (offset2 >= 0)\r
 549                   ? ( w << offset2 ) >> offset\r
 550                   : ( w >> j) | (tags_fix[i+1] << (W+offset2)) >> offset;\r
 551                   }; */\r
 552 \r
 553   }\r
 554 \r
 555      /** FirstChild(x): returns the first child of node x, or NULLT if the node is a leaf\r
 556     */\r
 557    treeNode FirstChild(treeNode x) {\r
 558            NULLT_IF(x==NULLT);\r
 559            return fast_first_child(Par, x);\r
 560    };\r
 561 \r
 562 \r
 563    /** FirstElement(x): returns the first non text, non attribute child of node x, or NULLT\r
 564     *    if none.\r
 565     */\r
 566    treeNode FirstElement(treeNode x){\r
 567      {\r
 568        NULLT_IF(x==NULLT);\r
 569        x = fast_first_child(Par, x);\r
 570        NULLT_IF(x == NULLT);\r
 571        switch (Tag(x)){\r
 572          \r
 573        case PCDATA_TAG_ID:\r
 574          x = x+2;\r
 575          return (fast_inspect(Par,x)==OP)? x : NULLT;\r
 576          \r
 577        case ATTRIBUTE_TAG_ID:  \r
 578          x = fast_next_sibling(Par,x);\r
 579          if (x != NULLT && Tag(x) == PCDATA_TAG_ID){\r
 580            x = x+2;\r
 581            return (fast_inspect(Par,x)==OP)? x : NULLT;\r
 582          } \r
 583          else return x;     \r
 584        default:\r
 585          return x;\r
 586        }\r
 587      }\r
 588    };\r
 589 \r
 590   /** NextSibling(x): returns the next sibling of node x, or NULLT if none \r
 591    * exists. */\r
 592   \r
 593   treeNode NextSibling(treeNode x) {\r
 594     NULLT_IF (x <= 0);\r
 595     return fast_next_sibling(Par, x);\r
 596   };\r
 597   \r
 598    /** NextElement(x): returns the first non text, non attribute sibling of node x, or NULLT\r
 599     *    if none.\r
 600     */\r
 601   treeNode NextElement(treeNode x)\r
 602   {\r
 603     NULLT_IF(x <= 0);\r
 604     x = fast_next_sibling(Par, x);\r
 605     NULLT_IF(x == NULLT);   \r
 606     if (Tag(x) == PCDATA_TAG_ID){\r
 607       x = x+2;\r
 608       return (fast_inspect(Par,x)==OP)? x : NULLT;\r
 609     }\r
 610     else return x;  \r
 611   };\r
 612      /** TaggedDesc(x,tag): returns the first node tagged tag with larger \r
 613     * preorder than x and within the subtree of x. Returns NULT if there \r
 614     * is none. */\r
 615   inline treeNode TaggedDescendant(treeNode x, TagType tag)\r
 616   {\r
 617     \r
 618           int s = (int) Tags->select_next(tag,node2tagpos(x));\r
 619           NULLT_IF (s == -1);\r
 620           \r
 621           treeNode y = tagpos2node(s); // transforms the tag position into a node position\r
 622           \r
 623           return (fast_is_ancestor(Par,x,y) ? y : NULLT);\r
 624   };\r
 625   \r
 626   inline treeNode TaggedFollowingBelow(treeNode x, TagType tag, treeNode ancestor)\r
 627   {\r
 628           treeNode close = fast_find_close(Par, x);\r
 629           treeNode s = tagpos2node(Tags->select_next(tag, close));\r
 630           \r
 631           if (ancestor == Root() || s == NULLT || s < fast_find_close(Par,ancestor)) return s;\r
 632           else return NULLT;\r
 633   };\r
 634 \r
 635   inline treeNode TaggedFollowingBefore(treeNode x, TagType tag, treeNode ancestor_closing)\r
 636   {\r
 637           treeNode close = fast_find_close(Par, x);\r
 638           treeNode s = tagpos2node(Tags->select_next(tag, close));\r
 639           \r
 640           if (ancestor_closing == Root() || s == NULLT || s < ancestor_closing) return s;\r
 641           else return NULLT;\r
 642   };\r
 643     \r
 644 };\r
 645 \r
 646 \r
 647 \r
 648 \r
 649 #endif\r
 650 \r