#include "basics.h"\r
-#include <cstring>\r
-#include <sstream>\r
+//#include <cstring>\r
#include <stack>\r
#include "XMLTree.h"\r
-#include <sys/time.h>\r
-#include <time.h>\r
-#include <sys/types.h>\r
-#include <sys/stat.h> \r
-#include <unistd.h>\r
-\r
-static double tLoading = 0;\r
-\r
-static unsigned int cLoading = 0;\r
-static struct timeval tmpv1;\r
-static struct timeval tmpv2;\r
-static string mem1;\r
-static string mem2;\r
-\r
-void read_procmem(string& memstr) {\r
- std::string buf;\r
- pid_t pid = getpid();\r
- std::stringstream path;\r
- path << "/proc/" << pid << "/status";\r
- std::ifstream infile;\r
- infile.open (path.str().c_str(), std::ifstream::in);\r
- while (infile.good()){\r
- getline(infile,buf);\r
- if (infile.eof()) {\r
- memstr = "Could not read memory";\r
- return;\r
- };\r
- int idx = buf.find("VmRSS");\r
- if (idx == 0){\r
- memstr = buf;\r
- return;\r
- };\r
- };\r
- memstr = "Could not read memory";\r
- return;\r
-\r
-}\r
-\r
-#define STARTTIMER() do { \\r
- gettimeofday(&tmpv1,NULL); \\r
- read_procmem(mem1); \\r
- } while (0) \\r
-\r
-#define STOPTIMER(x) do { \\r
- read_procmem(mem2); \\r
- gettimeofday(&tmpv2,NULL); \\r
- (t##x) = ((tmpv2.tv_sec - tmpv1.tv_sec) * 1000000.0 + \\r
- (tmpv2.tv_usec - tmpv1.tv_usec))/1000.0; \\r
- (c##x)= (c##x)+1; \\r
- } while (0)\r
-\r
-#define PRINTTIME(s,x) do { \\r
- std::cerr << (s) << " : " << (t##x) << "ms" << std::endl; \\r
- std::cerr << "Mem use before: " << mem1 << std::endl; \\r
- std::cerr << "Mem use after: " << mem2 << std::endl; \\r
- std::cerr.flush(); \\r
- } while (0) \\r
-\r
+#include "timings.h"\r
\r
// functions to convert tag positions to the corresponding tree node and viceversa. \r
// These are implemented in order to be able to change the tree and Tags representations, \r
// the tree, and storing 2 tags per tree node (opening and closing tags).\r
\r
// tag position -> tree node\r
-inline treeNode tagpos2node(int t) \r
+static treeNode tagpos2node(int t) \r
{\r
return (treeNode) t;\r
}\r
\r
+static int bits8 (int t ) {\r
+ int r = bits(t);\r
+ if (r <= 8)\r
+ return 8;\r
+ else if (r <= 16)\r
+ return 16;\r
+ else \r
+ return r;\r
+}\r
+\r
// tree node -> tag position\r
-inline int node2tagpos(treeNode x) \r
+static int node2tagpos(treeNode x) \r
{\r
return (int)x;\r
}\r
\r
-int fast_find_close(bp *b,int s)\r
+static int fast_find_close(bp *b,int s)\r
{\r
return fwd_excess(b,s,-1);\r
}\r
\r
-inline int fast_inspect(bp* Par,treeNode i)\r
+static int fast_inspect(bp* Par,treeNode i)\r
{\r
int j,l;\r
j = i >> logD;\r
return (Par->B[j] >> (D-1-l)) & 1;\r
}\r
\r
-inline treeNode fast_first_child(bp *Par, treeNode x)\r
+static treeNode fast_first_child(bp *Par, treeNode x)\r
{\r
x = x+1;\r
- return (fast_inspect(Par,x) == CP) ? NULLT : x;\r
+ return (fast_inspect(Par,x) == OP) ? x : NULLT;\r
}\r
\r
-inline treeNode fast_next_sibling(bp* Par,treeNode x)\r
+static treeNode fast_next_sibling(bp* Par,treeNode x)\r
{\r
- x = fast_find_close(Par,x)+1;\r
- return (fast_inspect(Par,x) == CP) ? NULLT : x;\r
+ x = fwd_excess(Par,x,0);\r
+ return (fast_inspect(Par,x) == OP) ? x : NULLT;\r
}\r
\r
\r
-inline treeNode fast_sibling(bp* Par,treeNode x,TagType tag){\r
+static treeNode fast_sibling(bp* Par,treeNode x,TagType tag){\r
\r
if (tag == PCDATA_TAG_ID){\r
x = x+2;\r
\r
}\r
\r
-inline bool fast_isleaf(bp* Par,treeNode x){\r
+static bool fast_isleaf(bp* Par,treeNode x){\r
return (fast_inspect(Par,x+1) == CP ? true : false);\r
}\r
\r
-inline uint fast_get_field(uint* A,int len, int idx)\r
+\r
+inline uint get_field_no_power(uint *A, uint len, uint index) {\r
+ \r
+ register uint i=index*len/W, j=index*len-W*i;\r
+ return (j+len <= W) ? (A[i] << (W-j-len)) >> (W-len) : (A[i] >> j) | (A[i+1] << (WW-j-len)) >> (W-len);\r
+\r
+}\r
+\r
+static uint fast_get_field(uint* A,int len, int idx)\r
{\r
- switch(len){\r
+ uint f1, f2;\r
+ switch (len) {\r
case 8:\r
- return (uint) (((uchar*)A)[idx]);\r
- // Other 8-alligned values need to take care of the endianess of the arch.\r
- default: \r
- return get_field (A,len,idx);\r
+ return (uint) (((uchar*)A)[idx]);\r
+ case 16:\r
+ f2 = ((unsigned short*)A)[idx];\r
+ f1 = ((unsigned short*)A)[idx+1];\r
+ return (f1 << 16) + f2;\r
+ default:\r
+ return get_field_no_power (A,len,idx);\r
};\r
\r
}\r
\r
inline bool fast_is_ancestor(bp * Par,treeNode x,treeNode y){\r
-\r
if (x > y) \r
return false;\r
else\r
- return (y <= fast_find_close(Par,x));\r
+ return (x==0) || (y <= fast_find_close(Par,x));\r
}\r
\r
\r
{\r
// creates the data structure for the tree topology\r
Par = (bp *)umalloc(sizeof(bp));\r
- bp_construct(Par, npar, (pb*) par, OPT_DEGREE|0); \r
+ STARTTIMER();\r
+ bp_construct(Par, npar, (pb*) par, OPT_DEGREE|0);\r
+ STOPTIMER(Building);\r
+ PRINTTIME("Building parenthesis struct", Building);\r
+ STARTTIMER();\r
+\r
\r
// creates structure for tags\r
\r
alphabet_mapper *am = new alphabet_mapper_none();\r
Tags = new static_sequence_bs((uint*)tags,npar,am,bmb);\r
\r
- cout << "Tags test: " << Tags->test((uint*)tags,npar) << endl;\r
+ //cout << "Tags test: " << Tags->test((uint*)tags,npar) << endl;\r
\r
//Ensures that for small tag numbers, we are on an 8bit boundary.\r
//Makes tag access way faster with negligeable waste of space.\r
- tags_blen = max(bits(max_tag),8);\r
+ tags_blen = bits8(max_tag);\r
std::cerr << "Tags blen is " << tags_blen << "\n";\r
tags_len = (uint)npar;\r
tags_fix = new uint[uint_len(tags_blen,tags_len)];\r
for(uint i=0;i<(uint)npar;i++)\r
set_field(tags_fix,tags_blen,i,tags[i]);\r
-\r
delete bmb; \r
free(tags);\r
tags = NULL;\r
+\r
+ STOPTIMER(Building);\r
+ PRINTTIME("Building Tag Structure", Building);\r
\r
Text = (TextCollection*) TC;\r
\r
disable_tc = dis_tc;\r
stream = NULL;\r
stream_fd = 0;\r
+ std::cerr << "Number of distinct tags " << TagName->size() << "\n";\r
+ //std::cerr.flush();\r
}\r
\r
\r
/// End ugly tests\r
\r
STOPTIMER(Loading);\r
+ std::cerr << (uint_len(XML_Tree->tags_blen,XML_Tree->tags_len)*sizeof(uint))/(1024*1024) << " MB for tag sequence" << std::endl;\r
PRINTTIME("Loading tag struct", Loading);\r
STARTTIMER();\r
\r
}\r
\r
\r
-// root(): returns the tree root.\r
-inline treeNode XMLTree::Root() \r
- {\r
- return 0; //root_node(Par);\r
- }\r
\r
// SubtreeSize(x): the number of nodes (and attributes) in the subtree of node x.\r
int XMLTree::SubtreeSize(treeNode x) \r
NULLT_IF(x==NULLT);\r
x = fast_first_child(Par, x);\r
NULLT_IF(x == NULLT);\r
- TagType tag = Tag(x);\r
- if (tag == PCDATA_TAG_ID){\r
+ switch (Tag(x)){\r
+ \r
+ case PCDATA_TAG_ID:\r
x = x+2;\r
return (fast_inspect(Par,x)==OP)? x : NULLT;\r
- } \r
- else if (tag == ATTRIBUTE_TAG_ID){\r
+ \r
+ case ATTRIBUTE_TAG_ID: \r
x = fast_next_sibling(Par,x);\r
if (x != NULLT && Tag(x) == PCDATA_TAG_ID){\r
x = x+2;\r
return (fast_inspect(Par,x)==OP)? x : NULLT;\r
} \r
- else return x; \r
- }else return x;\r
+ else return x; \r
+ default:\r
+ return x;\r
+ }\r
}\r
\r
treeNode XMLTree::NextElement(treeNode x) \r
}\r
else return x; \r
}\r
+value XMLTree::CamlFirstElement(value x)\r
+{\r
+ return Val_int(FirstElement(Int_val(x)));\r
+}\r
+value XMLTree::CamlNextElement(value x)\r
+{\r
+ return Val_int(NextElement(Int_val(x)));\r
+}\r
+\r
+extern "C" value caml_cpp_fast_first_element(value xmltree, value node){\r
+ return XMLTREE(xmltree)->CamlFirstElement(node);\r
+}\r
+\r
+extern "C" value caml_cpp_fast_next_element(value xmltree, value node){\r
+ return XMLTREE(xmltree)->CamlNextElement(node);\r
+}\r
\r
// LastChild(x): returns the last child of node x.\r
treeNode XMLTree::LastChild(treeNode x)\r
{\r
- NULLT_IF(NULLT || x == Root() || fast_isleaf(Par,x));\r
+ NULLT_IF(x == NULLT || fast_isleaf(Par,x));\r
return find_open(Par, fast_find_close(Par, x)-1);\r
}\r
\r
// PrevSibling(x): returns the previous sibling of node x, assuming it exists.\r
treeNode XMLTree::PrevSibling(treeNode x) \r
{\r
- NULLT_IF(x==NULLT || x == Root());\r
+ NULLT_IF(x==NULLT);\r
return prev_sibling(Par, x);\r
}\r
\r
// the subtree of x. Returns NULLT if there is none.\r
treeNode XMLTree::TaggedDescendant(treeNode x, TagType tag) \r
{\r
- NULLT_IF(x==NULLT || fast_isleaf(Par,x));\r
+ //NULLT_IF(x==NULLT || fast_isleaf(Par,x));\r
\r
int s = (int) Tags->select_next(tag,node2tagpos(x));\r
NULLT_IF (s == -1);\r
// and not in the subtree of x. Returns NULLT if there is none.\r
treeNode XMLTree::TaggedFollowingBelow(treeNode x, TagType tag, treeNode ancestor)\r
{\r
- NULLT_IF (x == NULLT || x == Root() || x == ancestor); \r
- treeNode s = tagpos2node(Tags->select_next(tag, fast_find_close(Par, x)));\r
- \r
- if (ancestor == Root()) return s;\r
- NULLT_IF (s == NULLT || s >= fast_find_close(Par, ancestor));\r
+ // NULLT_IF (x == NULLT || x == Root() || x == ancestor); \r
+\r
+ //Special optimisation, test for the following sibling first\r
+ treeNode close = fast_find_close(Par, x);\r
+ /*\r
+ treeNode ns = close+1;\r
+ if (fast_inspect(Par,ns) == OP) {\r
+ TagType tagns = Tag(ns);\r
+ // cout << GetTagNameByRef(tagns) << endl;\r
+ //cout.flush();\r
+ if (tagns == PCDATA_TAG_ID){\r
+ close = ns+1;\r
+ ns = ns+2;\r
+ if (fast_inspect(Par,ns) != OP)\r
+ goto after;\r
+ tagns = Tag(ns); \r
+ };\r
+ if (tagns == tag)\r
+ return ns;\r
+ };\r
+ after:\r
+ */\r
+ treeNode s = tagpos2node(Tags->select_next(tag, close));\r
\r
- return s;\r
+ if (ancestor == Root() || s==NULLT || s < fast_find_close(Par,ancestor)) return s;\r
+ else return NULLT;\r
} \r
\r
treeNode XMLTree::TaggedFollowingBefore(treeNode x, TagType tag, treeNode closing)\r
{\r
\r
NULLT_IF(x==NULLT || x==Root());\r
+\r
+ treeNode close = fast_find_close(Par,x);\r
+ treeNode ns = close+1;\r
+ if ( (fast_inspect(Par,ns) == OP) && (tags->find(Tag(ns)) != tags->end()))\r
+ return ns;\r
+\r
int i;\r
treeNode min = NULLT;\r
- treeNode ns = fast_next_sibling(Par, x);\r
- treeNode close = ns - 1;\r
treeNode aux;\r
+ \r
+\r
TagIdSet::const_iterator tagit;\r
for (tagit = tags->begin(); tagit != tags->end(); tagit++) {\r
\r
\r
// The next sibling of x is guaranteed to be below ctx\r
// and is the node with lowest preorder which is after ctx.\r
- // if we find it, we return early;\r
- \r
- if (aux == ns ) return ns;\r
+ // if we find it, we return early; \r
if (aux == NULLT) continue;\r
if ((min == NULLT) || (aux < min)) min = aux;\r
};\r