X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=XMLTree.h;h=a3ac2ed1540505bb6fd45b0cfa0e9b7ae548e865;hb=a4311418afa11f8e4dbcb3d0a5cc8354ff530efc;hp=a62abfed387bbaf81b69ae2b2101d75bb7deedd2;hpb=41ca14161f53191521d3ea3967ea9184a5a2ae39;p=SXSI%2FXMLTree.git diff --git a/XMLTree.h b/XMLTree.h index a62abfe..a3ac2ed 100644 --- a/XMLTree.h +++ b/XMLTree.h @@ -1,4 +1,3 @@ - /****************************************************************************** * Copyright (C) 2008 by Diego Arroyuelo * * Interface for the in-memory XQuery/XPath engine * @@ -34,7 +33,7 @@ #undef Wminusone #include "bp.h" - +#include #include #include #include @@ -84,7 +83,7 @@ typedef struct { #define ATTRIBUTE_DATA_CLOSE_TAG "/<@$>" - +typedef std::unordered_set TagIdSet; typedef std::unordered_map TagIdMap; typedef TagIdMap::const_iterator TagIdMapIT; @@ -92,6 +91,10 @@ typedef TagIdMap::const_iterator TagIdMapIT; (v)->push_back(t); } while (false) +// returns NULLT if the test is true +#define NULLT_IF(x) do { if (x) return NULLT; } while (0) + + class XMLTreeBuilder; class XMLTree { @@ -121,6 +124,8 @@ class XMLTree { // Allows to disable the TextCollection for benchmarkin purposes bool disable_tc; + FILE* stream; + int stream_fd; /** Data structure constructors */ XMLTree(){;}; @@ -135,35 +140,47 @@ public: /** root(): returns the tree root. */ treeNode Root(); - + + /** Size() : Number of parenthesis */ + unsigned int Size(){ + return tags_len/2; + } + /** SubtreeSize(x): the number of nodes (and attributes) in the subtree of * node x. */ int SubtreeSize(treeNode x); - + /** SubtreeTags(x,tag): the number of occurrences of tag within the subtree * of node x. */ int SubtreeTags(treeNode x, TagType tag); + /** SubtreeElements(x) of element nodes in the subtree of x + */ + int SubtreeElements(treeNode x); + /** IsLeaf(x): returns whether node x is leaf or not. In the succinct * representation this is just a bit inspection. */ + bool IsLeaf(treeNode x); - + /** IsAncestor(x,y): returns whether node x is ancestor of node y. */ + bool IsAncestor(treeNode x, treeNode y); /** IsChild(x,y): returns whether node x is parent of node y. */ bool IsChild(treeNode x, treeNode y); /** IsFirstChild(x): returns whether node x is the first child of its parent. */ + /* OCAML */ bool IsFirstChild(treeNode x); - + /** NumChildren(x): number of children of node x. Constant time with the * data structure of Sadakane. */ int NumChildren(treeNode x); - + /** ChildNumber(x): returns i if node x is the i-th children of its * parent. */ - inline int ChildNumber(treeNode x); + int ChildNumber(treeNode x); /** Depth(x): depth of node x, a simple binary rank on the parentheses * sequence. */ @@ -176,35 +193,51 @@ public: /** Postorder(x): returns the postorder number of node x, just regarding * tree nodes (and not texts). */ int Postorder(treeNode x); - + /** Tag(x): returns the tag identifier of node x. */ - TagType Tag(treeNode x); - + TagType Tag(treeNode x) { + if (tags_blen == 8) + return (TagType) (((uchar*)tags_fix)[(int) x]); + else + return (TagType) get_field(tags_fix,tags_blen, (int) x); + } + /** DocIds(x): returns the range (i.e., a pair of integers) of document * identifiers that descend from node x. */ range DocIds(treeNode x); - + /** Parent(x): returns the parent node of node x. */ treeNode Parent(treeNode x); + /* Assumes x is neither 0 nor -1 */ /** Child(x,i): returns the i-th child of node x, assuming it exists. */ treeNode Child(treeNode x, int i); - - /** FirstChild(x): returns the first child of node x, assuming it exists. - * Very fast in BP. */ + + /** FirstChild(x): returns the first child of node x, or NULLT if the node is a leaf + */ treeNode FirstChild(treeNode x); + + /** FirstElement(x): returns the first non text, non attribute child of node x, or NULLT + * if none. + */ treeNode FirstElement(treeNode x); /** LastChild(x): returns the last child of node x. */ treeNode LastChild(treeNode x); - /** NextSibling(x): returns the next sibling of node x, assuming it + /** NextSibling(x): returns the next sibling of node x, or NULLT if none * exists. */ + treeNode NextSibling(treeNode x); + + /** NextElement(x): returns the first non text, non attribute sibling of node x, or NULLT + * if none. + */ treeNode NextElement(treeNode x); /** PrevSibling(x): returns the previous sibling of node x, assuming it * exists. */ + treeNode PrevSibling(treeNode x); /** TaggedChild(x,tag): returns the first child of node x tagged tag, or @@ -213,38 +246,38 @@ public: * among the children of node x until finding the desired child. */ treeNode TaggedChild(treeNode x, TagType tag); - treeNode SelectChild(treeNode x, std::unordered_set * tags); + treeNode SelectChild(treeNode x, TagIdSet * tags); - /** TaggedFollSibling(x,tag): returns the first sibling of node x tagged tag, or + /** TaggedFollowingSibling(x,tag): returns the first sibling of node x tagged tag, or * NULLT if there is none. */ - treeNode TaggedFollSibling(treeNode x, TagType tag); + treeNode TaggedFollowingSibling(treeNode x, TagType tag); - treeNode SelectFollSibling(treeNode x, std::unordered_set * tags); + treeNode SelectFollowingSibling(treeNode x, TagIdSet * tags); /** TaggedDesc(x,tag): returns the first node tagged tag with larger * preorder than x and within the subtree of x. Returns NULT if there * is none. */ - treeNode TaggedDesc(treeNode x, TagType tag); - - treeNode SelectDesc(treeNode x, std::unordered_set * tags); + treeNode TaggedDescendant(treeNode x, TagType tag); + treeNode SelectDescendant(treeNode x, TagIdSet * tags); /** TaggedPrec(x,tag): returns the first node tagged tag with smaller * preorder than x and not an ancestor of x. Returns NULLT if there * is none. */ - treeNode TaggedPrec(treeNode x, TagType tag); + treeNode TaggedPreceding(treeNode x, TagType tag); /** TaggedFoll(x,tag): returns the first node tagged tag with larger * preorder than x and not in the subtree of x. Returns NULLT if there * is none. */ - treeNode TaggedFoll(treeNode x, TagType tag); + treeNode TaggedFollowing(treeNode x, TagType tag); - treeNode TaggedFollBelow(treeNode x, TagType tag,treeNode root); - - treeNode SelectFollBelow(treeNode x, std::unordered_set * tags, treeNode root); + treeNode TaggedFollowingBelow(treeNode x, TagType tag,treeNode ancestor); - /** TaggedFollowingSibling(x,tag) */ - treeNode TaggedFollowingSibling(treeNode x, TagType tag); + treeNode SelectFollowingBelow(treeNode x, TagIdSet * tags, treeNode ancestor); + + treeNode TaggedFollowingBefore(treeNode x, TagType tag,treeNode closing); + + treeNode SelectFollowingBefore(treeNode x, TagIdSet * tags, treeNode closing); /** TaggedAncestor(x, tag): returns the closest ancestor of x tagged * tag. Return NULLT is there is none. */ @@ -261,7 +294,8 @@ public: /** MyText(x): returns the document identifier of the text below node x, or * NULLT if x is not a leaf node. */ DocID MyText(treeNode x); - + DocID MyTextUnsafe(treeNode x); + /** TextXMLId(d): returns the preorder of document with identifier d in the * tree consisting of all tree nodes and all text nodes. */ int TextXMLId(DocID d); @@ -311,7 +345,7 @@ public: } /** Equal(s): search for texts equal to string s. */ - TextCollection::document_result Equal(uchar const *s) { + TextCollection::document_result Equals(uchar const *s) { return Text->Equal(s); } @@ -387,8 +421,9 @@ public: /** GetText(d): returns the text corresponding to document with * id d. */ uchar* GetText(DocID d) { - uchar * s = Text->GetText(d); - return (s[0] == 1 ? (uchar*)"" : s); + + uchar * s = Text->GetText(d); + return (s[0] == 1 ? (uchar*)"" : s); } /** GetText(i, j): returns the texts corresponding to documents with @@ -407,15 +442,22 @@ public: /** Load: loads XML tree data structure from file. sample_rate_text * indicates the sample rate for the text search data structure. */ - static XMLTree *Load(int fd); + static XMLTree *Load(int fd,bool load_tc, int sample_factor); void insertTag(TagType tag, uint position); void print_stats(); + + /** Parenthesis functions */ treeNode Closing(treeNode x); + bool IsOpen(treeNode x); + + /** Print procedure */ + void Print(int fd,treeNode x); + }; #endif