\r
#ifndef XMLTREE_H_\r
#define XMLTREE_H_\r
+extern "C" {\r
+#define CAML_NAME_SPACE\r
+#include <caml/mlvalues.h>\r
+#include <caml/custom.h>\r
+#define XMLTREE(x) ((XMLTree *)(* (XMLTree**) Data_custom_val(x)))\r
+ //#define XMLTREE(x) ((XMLTree*) (x))\r
+}\r
#include <unordered_set>\r
#include <unordered_map>\r
+#include <sstream>\r
#include "TextCollection/TextCollectionBuilder.h"\r
-#include <stdio.h>\r
-#include <stdlib.h>\r
+\r
+#include <cstdio>\r
+#include <cstdlib>\r
#include <cstring>\r
\r
\r
using SXSI::TextCollectionBuilder;\r
\r
\r
+\r
// this constant is used to efficiently compute the child operation in the tree\r
#define OPTD 10\r
\r
#define PCDATA_TAG_ID 2\r
#define ATTRIBUTE_DATA_OPEN_TAG "<@$>"\r
#define ATTRIBUTE_DATA_TAG_ID 3\r
+#define CLOSING_TAG "</>"\r
+#define CLOSING_TAG_ID 4\r
#define DOCUMENT_CLOSE_TAG "/"\r
#define ATTRIBUTE_CLOSE_TAG "/<@>"\r
#define PCDATA_CLOSE_TAG "/<$>"\r
#define NULLT_IF(x) do { if (x) return NULLT; } while (0)\r
\r
\r
+\r
+\r
+\r
+\r
class XMLTreeBuilder;\r
\r
class XMLTree {\r
~XMLTree();\r
\r
/** root(): returns the tree root. */\r
- treeNode Root();\r
+ treeNode Root() { return 0; }\r
\r
/** Size() : Number of parenthesis */\r
unsigned int Size(){\r
* if none.\r
*/\r
treeNode FirstElement(treeNode x);\r
-\r
+ value CamlFirstElement(value x);\r
/** LastChild(x): returns the last child of node x. */\r
treeNode LastChild(treeNode x);\r
\r
* if none.\r
*/\r
treeNode NextElement(treeNode x);\r
- \r
+ value CamlNextElement(value x);\r
/** PrevSibling(x): returns the previous sibling of node x, assuming it \r
* exists. */\r
\r
void Print(int fd,treeNode x);\r
\r
};\r
+\r
+extern "C" value caml_cpp_fast_first_element(value xmltree, value node);\r
+extern "C" value caml_cpp_fast_next_element(value xmltree, value node);\r
+\r
+\r
+\r
#endif\r
\r
#include "basics.h"\r
#include "XMLTreeBuilder.h"\r
-\r
+#include "timings.h"\r
\r
XMLTreeBuilder::~XMLTreeBuilder(){\r
\r
npar = 0;\r
parArraySize = 1;\r
disable_tc = dtc;\r
- \r
+\r
+ STARTTIMER();\r
\r
par_aux = (pb *)umalloc(sizeof(pb)*parArraySize);\r
\r
REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_OPEN_TAG);\r
REGISTER_TAG(TagName,tIdMap,PCDATA_OPEN_TAG);\r
REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_DATA_OPEN_TAG);\r
+ REGISTER_TAG(TagName,tIdMap,CLOSING_TAG);\r
REGISTER_TAG(TagName,tIdMap,DOCUMENT_CLOSE_TAG);\r
REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_CLOSE_TAG);\r
REGISTER_TAG(TagName,tIdMap,PCDATA_CLOSE_TAG);\r
//npar++;\r
\r
// makes the text collection static\r
+ STOPTIMER(Parsing);\r
+ PRINTTIME("Parsing XML Document", Parsing);\r
+\r
if (!disable_tc) {\r
assert(Text == 0);\r
assert(TextBuilder != 0);\r
+ STARTTIMER();\r
Text = TextBuilder->InitTextCollection();\r
delete TextBuilder;\r
TextBuilder = 0;\r
+ STOPTIMER(Building);\r
+ PRINTTIME("Building TextCollection", Building);\r
+\r
}\r
\r
XMLTree *T = new XMLTree(par_aux,\r
\r
setbit(par_aux,npar,CP); // marks a new closing parenthesis\r
\r
- tagname.insert(0,"/");\r
+ //tagname.insert(0,"/");\r
\r
- TagIdMapIT tag_id = tIdMap->find(tagname); \r
+ //TagIdMapIT tag_id = tIdMap->find(tagname); \r
\r
- if (tag_id == tIdMap->end()){\r
- REGISTER_TAG(TagName,tIdMap,tagname);\r
- i = TagName->size() - 1;\r
- }\r
- else\r
- i = tag_id->second;\r
+ // if (tag_id == tIdMap->end()){\r
+ // REGISTER_TAG(TagName,tIdMap,tagname);\r
+ // i = TagName->size() - 1;\r
+ // }\r
+ // else\r
+ // i = tag_id->second;\r
\r
tags_aux = (TagType *)urealloc(tags_aux, sizeof(TagType)*(npar + 1));\r
\r
- tags_aux[npar] = i; // inserts the new tag id within the preorder sequence of tags\r
+ tags_aux[npar] = CLOSING_TAG_ID; // inserts the new tag id within the preorder sequence of tags\r
\r
npar++;\r
\r
\r
#ifndef XMLTREEBUILDER_H_\r
#define XMLTREEBUILDER_H_\r
+\r
+#include <cstdlib>\r
+#include <cstdio>\r
+#include <cstring>\r
#include <unordered_map>\r
#include "TextCollection/TextCollectionBuilder.h"\r
-#include <stdio.h>\r
-#include <stdlib.h>\r
-#include <cstring>\r
-\r
-\r
#undef W\r
#undef WW\r
#undef Wminusone\r
#include <static_bitsequence.h>\r
#include <alphabet_mapper.h>\r
#include <static_sequence.h>\r
+\r
using SXSI::TextCollection;\r
using SXSI::TextCollectionBuilder;\r
\r
#define RANDOM\r
\r
int msize=0;\r
-#define mymalloc(p,n,f) {p =(__typeof__(p)) malloc((n)*sizeof(*p)); msize += (f)*(n)*sizeof(*p); /* if (f) printf("malloc %d bytes at line %d total %d\n",(n)*sizeof(*p),__LINE__,msize); */ if ((p)==NULL) {printf("not enough memory (%d bytes) in line %d\n",msize,__LINE__); exit(1);};}\r
+#define mymalloc(p,n,f) { \\r
+ p = (__typeof__(p)) malloc((n)*sizeof(*p)); \\r
+if ((p)==NULL) {printf("not enough memory (%d bytes) in line %d\n",msize,__LINE__); \\r
+ exit(1);}; \\r
+msize += (f)*(n)*sizeof(*p); \\r
+;}\r
\r
int postorder_select_bsearch(bp *b,int s);\r
\r
//#define logMB 3
#define MB (1<<logMB)
-#define ETW 8 // width of excess lookup table
+#define ETW 8 // width of excess lookup table
#define W1 2 // branching factor
#ifndef min
return x;\r
}\r
\r
+int setbit_zero(pb *B, int i)\r
+{\r
+ int j,l;\r
+ j = i >> logD;\r
+ l = i & (D-1);\r
+ B[j] &= (~(1<<(D-1-l)));\r
+ return 0;\r
+}\r
+\r
+int setbit_one(pb *B, int i)\r
+{\r
+ int j,l;\r
+ j = i >> logD;\r
+ l = i & (D-1);\r
+ B[j] |= (1<<(D-1-l));\r
+ return 1;\r
+\r
+}\r
+\r
+\r
+\r
int setbits(pb *B, int i, int d, int x)\r
{\r
int j;\r
\r
static int selecttbl[8*256];\r
\r
-unsigned int popcount(pb x)\r
+unsigned int popcount_old(pb x)\r
{\r
pb r;\r
#if 0\r
return r;\r
}\r
\r
+inline unsigned int\r
+popcount(pb x) \r
+{\r
+ uint m1 = 0x55555555;\r
+ uint m2 = 0xc30c30c3;\r
+ x -= (x >> 1) & m1;\r
+ x = (x & m2) + ((x >> 2) & m2) + ((x >> 4) & m2);\r
+ x += x >> 6;\r
+ return (x + (x >> 12) + (x >> 24)) & 0x3f;\r
+}\r
+\r
+\r
unsigned int popcount8(pb x)\r
{\r
dword r;\r
libcompact:
@echo " [MSG] Entering directory src"
@make --no-print-directory -C src
-
+
tests: libcompact
@echo " [MSG] Entering directory tests"
@make --no-print-directory -C tests
CPP=g++
-#CPPFLAGS=-g3 -Wall -O0
-CPPFLAGS=-O9 -Wall -DNDEBUG
+CPPFLAGS=-O3 -Wall -DNDEBUG -fno-PIC
INCL=-I../includes/
return __popcount_tab[(x >> 0) & 0xff] + __popcount_tab[(x >> 8) & 0xff]
+ __popcount_tab[(x >> 16) & 0xff] + __popcount_tab[(x >> 24) & 0xff];
}
+inline unsigned int
+fast_popcount(int x)
+{
+ uint m1 = 0x55555555;
+ uint m2 = 0x33333333;
+ uint m4 = 0x0f0f0f0f;
+ x -= (x >> 1) & m1;
+ x = (x & m2) + ((x >> 2) & m2);
+ x = (x + (x >> 4)) & m4;
+ x += x >> 8;
+ return (x + (x >> 16)) & 0x3f;
+}
+
+
/** Counts the number of 1s in the first 16 bits of x */
inline uint popcount16(int x){
3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8
};
+static inline unsigned int
+_fast_popcount2(int x)
+{
+ uint m1 = 0x55555555;
+ uint m2 = 0x33333333;
+ uint m4 = 0x0f0f0f0f;
+ x -= (x >> 1) & m1;
+ x = (x & m2) + ((x >> 2) & m2);
+ x = (x + (x >> 4)) & m4;
+ x += x >> 8;
+ return (x + (x >> 16)) & 0x3f;
+}
+
+static inline unsigned int
+_fast_popcount3(int x)
+{
+ uint m1 = 0x55555555;
+ uint m2 = 0xc30c30c3;
+ x -= (x >> 1) & m1;
+ x = (x & m2) + ((x >> 2) & m2) + ((x >> 4) & m2);
+ x += x >> 6;
+ return (x + (x >> 12) + (x >> 24)) & 0x3f;
+}
+
+static inline unsigned int
+_fast_popcount(int x) {
+ return _popCount[x];
+}
static unsigned int __selecttbl[8*256];
static int built = 0;
if (f == 1) {
rr = p & (8-1);
- r -= _popCount[*q >> (8-1-rr)];
+ //r -= _popCount[*q >> (8-1-rr)];
+ r -= _fast_popcount(*q >> (8-1-rr));
//p = p - rr;
while (1) {
- rr = _popCount[*q];
+ //rr = _popCount[*q];
+ rr = _fast_popcount(*q);
if (r + rr >= i) break;
r += rr;
//p += 8;
}
else {
rr = p & (8-1);
- r -= _popCount[(*q ^ 0xff) >> (8-1-rr)];
+ //r -= _popCount[(*q ^ 0xff) >> (8-1-rr)];
+ r -= _fast_popcount((*q ^ 0xff) >> (8-1-rr));
//p = p - rr;
while (1) {
- rr = _popCount[*q ^ 0xff];
+ //rr = _popCount[*q ^ 0xff];
+ rr = _fast_popcount(*q ^ 0xff);
if (r + rr >= i) break;
r += rr;
//p += 8;
if (f == 1) {
rr = p & (8-1);
- r -= _popCount[*q >> (8-1-rr)];
+ //r -= _popCount[*q >> (8-1-rr)];
+ r -= _fast_popcount(*q >> (8-1-rr));
//p = p - rr;
while (1) {
- rr = _popCount[*q];
+ //rr = _popCount[*q];
+ rr = _fast_popcount(*q);
if (r + rr >= i) break;
r += rr;
//p += 8;
if ((i>>logL) == ((i+1)>>logL)) {
i++;
while (1) {
- rr = _popCount[*q];
+ //rr = _popCount[*q];
+ r = _fast_popcount(*q);
if (r + rr >= i) break;
r += rr;
q++;
}
else {
rr = p & (8-1);
- r -= _popCount[(*q ^ 0xff) >> (8-1-rr)];
+ //r -= _popCount[(*q ^ 0xff) >> (8-1-rr)];
+ r -= _fast_popcount((*q ^ 0xff) >> (8-1-rr));
//p = p - rr;
while (1) {
- rr = _popCount[*q ^ 0xff];
+ //rr = _popCount[*q ^ 0xff];
+ rr = _fast_popcount(*q ^ 0xff);
if (r + rr >= i) break;
r += rr;
//p += 8;
if ((i>>logL) == ((i+1)>>logL)) {
i++;
while (1) {
- rr = _popCount[*q ^ 0xff];
+ //rr = _popCount[*q ^ 0xff];
+ rr = _fast_popcount(*q ^ 0xff);
if (r + rr >= i) break;
r += rr;
q++;
if(am->map(c)>=sigma) return (uint)-1;
return bitmaps[am->map(c)]->rank1(i);
}
-
+/*
uint static_sequence_bs::select(uint c, uint i) {
if(am->map(c)>=sigma) return (uint)-1;
return bitmaps[am->map(c)]->select1(i);
if(am->map(c)>=sigma) return (uint)-1;
return bitmaps[am->map(c)]->select_next1(i);
}
-
+*/
+uint static_sequence_bs::select(uint c, uint i) {
+ if(c>=sigma) return (uint)-1;
+ return bitmaps[c]->select1(i);
+}
+uint static_sequence_bs::select_next(uint c, uint i) {
+ if(c>=sigma) return (uint)-1;
+ return bitmaps[c]->select_next1(i);
+}
uint static_sequence_bs::access(uint i) {
for(uint j=0;j<sigma;j++) {
if(bitmaps[j]->access(i)) return am->unmap(j);
--- /dev/null
+#ifndef TIMINGS_H_
+#define TIMINGS_H_
+//Timing Macro's
+#include <sys/time.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+static double tParsing = 0;
+static unsigned int cParsing = 0;
+
+static double tLoading = 0;
+static unsigned int cLoading = 0;
+static double tBuilding = 0;
+static unsigned int cBuilding = 0;
+
+static struct timeval tmpv1;
+static struct timeval tmpv2;
+static string mem1;
+static string mem2;
+
+static void read_procmem(string& memstr) {
+ std::string buf;
+ pid_t pid = getpid();
+ std::stringstream path;
+ path << "/proc/" << pid << "/status";
+ std::ifstream infile;
+ infile.open (path.str().c_str(), std::ifstream::in);
+ while (infile.good()){
+ getline(infile,buf);
+ if (infile.eof()) {
+ memstr = "Could not read memory";
+ return;
+ };
+ int idx = buf.find("VmRSS");
+ if (idx == 0){
+ memstr = buf;
+ return;
+ };
+ };
+ memstr = "Could not read memory";
+ return;
+
+}
+
+#define STARTTIMER() do { \
+ read_procmem(mem1); \
+ gettimeofday(&tmpv1,NULL); \
+ } while (0) \
+
+#define STOPTIMER(x) do { \
+ gettimeofday(&tmpv2,NULL); \
+ read_procmem(mem2); \
+ (t##x) = ((tmpv2.tv_sec - tmpv1.tv_sec) * 1000000.0 + \
+ (tmpv2.tv_usec - tmpv1.tv_usec))/1000.0; \
+ (c##x)= (c##x)+1; \
+ } while (0)
+
+#define PRINTTIME(s,x) do { \
+ std::cerr << (s) << " : " << (t##x) << "ms" << std::endl; \
+ std::cerr << "Mem use before: " << mem1 << std::endl; \
+ std::cerr << "Mem use after: " << mem2 << std::endl; \
+ } while (0) \
+
+
+
+
+
+
+
+
+#endif