From 1c40b498ddd6d66b09aff3a22b9f7ddd845250dc Mon Sep 17 00:00:00 2001 From: kim Date: Mon, 12 Apr 2010 01:34:32 +0000 Subject: [PATCH] . git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/XMLTree@779 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- XMLTree.h | 32 ++++++-- XMLTreeBuilder.cpp | 31 +++++--- XMLTreeBuilder.h | 10 +-- bp.c | 7 +- bp.h | 2 +- darray.c | 35 ++++++++- libcds/Makefile | 2 +- libcds/src/Makefile | 3 +- libcds/src/basics.h | 14 ++++ libcds/src/static_bitsequence/sdarray.cpp | 58 ++++++++++++--- .../static_sequence/static_sequence_bs.cpp | 12 ++- timings.h | 73 +++++++++++++++++++ 12 files changed, 240 insertions(+), 39 deletions(-) create mode 100644 timings.h diff --git a/XMLTree.h b/XMLTree.h index a3ac2ed..1216562 100644 --- a/XMLTree.h +++ b/XMLTree.h @@ -20,11 +20,20 @@ #ifndef XMLTREE_H_ #define XMLTREE_H_ +extern "C" { +#define CAML_NAME_SPACE +#include +#include +#define XMLTREE(x) ((XMLTree *)(* (XMLTree**) Data_custom_val(x))) + //#define XMLTREE(x) ((XMLTree*) (x)) +} #include #include +#include #include "TextCollection/TextCollectionBuilder.h" -#include -#include + +#include +#include #include @@ -41,6 +50,7 @@ using SXSI::TextCollection; using SXSI::TextCollectionBuilder; + // this constant is used to efficiently compute the child operation in the tree #define OPTD 10 @@ -77,6 +87,8 @@ typedef struct { #define PCDATA_TAG_ID 2 #define ATTRIBUTE_DATA_OPEN_TAG "<@$>" #define ATTRIBUTE_DATA_TAG_ID 3 +#define CLOSING_TAG "" +#define CLOSING_TAG_ID 4 #define DOCUMENT_CLOSE_TAG "/" #define ATTRIBUTE_CLOSE_TAG "/<@>" #define PCDATA_CLOSE_TAG "/<$>" @@ -95,6 +107,10 @@ typedef TagIdMap::const_iterator TagIdMapIT; #define NULLT_IF(x) do { if (x) return NULLT; } while (0) + + + + class XMLTreeBuilder; class XMLTree { @@ -139,7 +155,7 @@ public: ~XMLTree(); /** root(): returns the tree root. */ - treeNode Root(); + treeNode Root() { return 0; } /** Size() : Number of parenthesis */ unsigned int Size(){ @@ -221,7 +237,7 @@ public: * if none. */ treeNode FirstElement(treeNode x); - + value CamlFirstElement(value x); /** LastChild(x): returns the last child of node x. */ treeNode LastChild(treeNode x); @@ -234,7 +250,7 @@ public: * if none. */ treeNode NextElement(treeNode x); - + value CamlNextElement(value x); /** PrevSibling(x): returns the previous sibling of node x, assuming it * exists. */ @@ -459,5 +475,11 @@ public: void Print(int fd,treeNode x); }; + +extern "C" value caml_cpp_fast_first_element(value xmltree, value node); +extern "C" value caml_cpp_fast_next_element(value xmltree, value node); + + + #endif diff --git a/XMLTreeBuilder.cpp b/XMLTreeBuilder.cpp index 9c0fc6a..595ca79 100644 --- a/XMLTreeBuilder.cpp +++ b/XMLTreeBuilder.cpp @@ -1,6 +1,6 @@ #include "basics.h" #include "XMLTreeBuilder.h" - +#include "timings.h" XMLTreeBuilder::~XMLTreeBuilder(){ @@ -14,7 +14,8 @@ int XMLTreeBuilder::OpenDocument(bool empty_texts, int sample_rate_text, bool dt npar = 0; parArraySize = 1; disable_tc = dtc; - + + STARTTIMER(); par_aux = (pb *)umalloc(sizeof(pb)*parArraySize); @@ -27,6 +28,7 @@ int XMLTreeBuilder::OpenDocument(bool empty_texts, int sample_rate_text, bool dt REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_OPEN_TAG); REGISTER_TAG(TagName,tIdMap,PCDATA_OPEN_TAG); REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_DATA_OPEN_TAG); + REGISTER_TAG(TagName,tIdMap,CLOSING_TAG); REGISTER_TAG(TagName,tIdMap,DOCUMENT_CLOSE_TAG); REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_CLOSE_TAG); REGISTER_TAG(TagName,tIdMap,PCDATA_CLOSE_TAG); @@ -55,12 +57,19 @@ XMLTree *XMLTreeBuilder::CloseDocument() //npar++; // makes the text collection static + STOPTIMER(Parsing); + PRINTTIME("Parsing XML Document", Parsing); + if (!disable_tc) { assert(Text == 0); assert(TextBuilder != 0); + STARTTIMER(); Text = TextBuilder->InitTextCollection(); delete TextBuilder; TextBuilder = 0; + STOPTIMER(Building); + PRINTTIME("Building TextCollection", Building); + } XMLTree *T = new XMLTree(par_aux, @@ -128,20 +137,20 @@ int XMLTreeBuilder::NewClosingTag(string tagname) setbit(par_aux,npar,CP); // marks a new closing parenthesis - tagname.insert(0,"/"); + //tagname.insert(0,"/"); - TagIdMapIT tag_id = tIdMap->find(tagname); + //TagIdMapIT tag_id = tIdMap->find(tagname); - if (tag_id == tIdMap->end()){ - REGISTER_TAG(TagName,tIdMap,tagname); - i = TagName->size() - 1; - } - else - i = tag_id->second; + // if (tag_id == tIdMap->end()){ + // REGISTER_TAG(TagName,tIdMap,tagname); + // i = TagName->size() - 1; + // } + // else + // i = tag_id->second; tags_aux = (TagType *)urealloc(tags_aux, sizeof(TagType)*(npar + 1)); - tags_aux[npar] = i; // inserts the new tag id within the preorder sequence of tags + tags_aux[npar] = CLOSING_TAG_ID; // inserts the new tag id within the preorder sequence of tags npar++; diff --git a/XMLTreeBuilder.h b/XMLTreeBuilder.h index 336ca2d..7dc08a1 100644 --- a/XMLTreeBuilder.h +++ b/XMLTreeBuilder.h @@ -21,13 +21,12 @@ #ifndef XMLTREEBUILDER_H_ #define XMLTREEBUILDER_H_ + +#include +#include +#include #include #include "TextCollection/TextCollectionBuilder.h" -#include -#include -#include - - #undef W #undef WW #undef Wminusone @@ -38,6 +37,7 @@ #include #include #include + using SXSI::TextCollection; using SXSI::TextCollectionBuilder; diff --git a/bp.c b/bp.c index c640816..6b7e296 100644 --- a/bp.c +++ b/bp.c @@ -4,7 +4,12 @@ #define RANDOM int msize=0; -#define mymalloc(p,n,f) {p =(__typeof__(p)) malloc((n)*sizeof(*p)); msize += (f)*(n)*sizeof(*p); /* if (f) printf("malloc %d bytes at line %d total %d\n",(n)*sizeof(*p),__LINE__,msize); */ if ((p)==NULL) {printf("not enough memory (%d bytes) in line %d\n",msize,__LINE__); exit(1);};} +#define mymalloc(p,n,f) { \ + p = (__typeof__(p)) malloc((n)*sizeof(*p)); \ +if ((p)==NULL) {printf("not enough memory (%d bytes) in line %d\n",msize,__LINE__); \ + exit(1);}; \ +msize += (f)*(n)*sizeof(*p); \ +;} int postorder_select_bsearch(bp *b,int s); diff --git a/bp.h b/bp.h index 9bfc291..c4e05e4 100644 --- a/bp.h +++ b/bp.h @@ -33,7 +33,7 @@ //#define logMB 3 #define MB (1<> logD; + l = i & (D-1); + B[j] &= (~(1<<(D-1-l))); + return 0; +} + +int setbit_one(pb *B, int i) +{ + int j,l; + j = i >> logD; + l = i & (D-1); + B[j] |= (1<<(D-1-l)); + return 1; + +} + + + int setbits(pb *B, int i, int d, int x) { int j; @@ -113,7 +134,7 @@ static const unsigned int popCount[] = { static int selecttbl[8*256]; -unsigned int popcount(pb x) +unsigned int popcount_old(pb x) { pb r; #if 0 @@ -142,6 +163,18 @@ unsigned int popcount(pb x) return r; } +inline unsigned int +popcount(pb x) +{ + uint m1 = 0x55555555; + uint m2 = 0xc30c30c3; + x -= (x >> 1) & m1; + x = (x & m2) + ((x >> 2) & m2) + ((x >> 4) & m2); + x += x >> 6; + return (x + (x >> 12) + (x >> 24)) & 0x3f; +} + + unsigned int popcount8(pb x) { dword r; diff --git a/libcds/Makefile b/libcds/Makefile index b5f4fa3..b6a856c 100644 --- a/libcds/Makefile +++ b/libcds/Makefile @@ -9,7 +9,7 @@ doc: libcompact: @echo " [MSG] Entering directory src" @make --no-print-directory -C src - + tests: libcompact @echo " [MSG] Entering directory tests" @make --no-print-directory -C tests diff --git a/libcds/src/Makefile b/libcds/src/Makefile index d4ccf7c..aaec1e9 100644 --- a/libcds/src/Makefile +++ b/libcds/src/Makefile @@ -1,7 +1,6 @@ CPP=g++ -#CPPFLAGS=-g3 -Wall -O0 -CPPFLAGS=-O9 -Wall -DNDEBUG +CPPFLAGS=-O3 -Wall -DNDEBUG -fno-PIC INCL=-I../includes/ diff --git a/libcds/src/basics.h b/libcds/src/basics.h index 4e00cae..3376902 100644 --- a/libcds/src/basics.h +++ b/libcds/src/basics.h @@ -233,6 +233,20 @@ inline uint popcount(int x){ return __popcount_tab[(x >> 0) & 0xff] + __popcount_tab[(x >> 8) & 0xff] + __popcount_tab[(x >> 16) & 0xff] + __popcount_tab[(x >> 24) & 0xff]; } +inline unsigned int +fast_popcount(int x) +{ + uint m1 = 0x55555555; + uint m2 = 0x33333333; + uint m4 = 0x0f0f0f0f; + x -= (x >> 1) & m1; + x = (x & m2) + ((x >> 2) & m2); + x = (x + (x >> 4)) & m4; + x += x >> 8; + return (x + (x >> 16)) & 0x3f; +} + + /** Counts the number of 1s in the first 16 bits of x */ inline uint popcount16(int x){ diff --git a/libcds/src/static_bitsequence/sdarray.cpp b/libcds/src/static_bitsequence/sdarray.cpp index 2294a0c..258162a 100644 --- a/libcds/src/static_bitsequence/sdarray.cpp +++ b/libcds/src/static_bitsequence/sdarray.cpp @@ -157,6 +157,34 @@ static const unsigned int _popCount[] = { 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, 4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8 }; +static inline unsigned int +_fast_popcount2(int x) +{ + uint m1 = 0x55555555; + uint m2 = 0x33333333; + uint m4 = 0x0f0f0f0f; + x -= (x >> 1) & m1; + x = (x & m2) + ((x >> 2) & m2); + x = (x + (x >> 4)) & m4; + x += x >> 8; + return (x + (x >> 16)) & 0x3f; +} + +static inline unsigned int +_fast_popcount3(int x) +{ + uint m1 = 0x55555555; + uint m2 = 0xc30c30c3; + x -= (x >> 1) & m1; + x = (x & m2) + ((x >> 2) & m2) + ((x >> 4) & m2); + x += x >> 6; + return (x + (x >> 12) + (x >> 24)) & 0x3f; +} + +static inline unsigned int +_fast_popcount(int x) { + return _popCount[x]; +} static unsigned int __selecttbl[8*256]; static int built = 0; @@ -395,11 +423,13 @@ int selectd2_select(selectd2 *select, int i,int f) { if (f == 1) { rr = p & (8-1); - r -= _popCount[*q >> (8-1-rr)]; + //r -= _popCount[*q >> (8-1-rr)]; + r -= _fast_popcount(*q >> (8-1-rr)); //p = p - rr; while (1) { - rr = _popCount[*q]; + //rr = _popCount[*q]; + rr = _fast_popcount(*q); if (r + rr >= i) break; r += rr; //p += 8; @@ -410,11 +440,13 @@ int selectd2_select(selectd2 *select, int i,int f) { } else { rr = p & (8-1); - r -= _popCount[(*q ^ 0xff) >> (8-1-rr)]; + //r -= _popCount[(*q ^ 0xff) >> (8-1-rr)]; + r -= _fast_popcount((*q ^ 0xff) >> (8-1-rr)); //p = p - rr; while (1) { - rr = _popCount[*q ^ 0xff]; + //rr = _popCount[*q ^ 0xff]; + rr = _fast_popcount(*q ^ 0xff); if (r + rr >= i) break; r += rr; //p += 8; @@ -471,11 +503,13 @@ int selectd2_select2(selectd2 *select, int i,int f, int *st, int *en) { if (f == 1) { rr = p & (8-1); - r -= _popCount[*q >> (8-1-rr)]; + //r -= _popCount[*q >> (8-1-rr)]; + r -= _fast_popcount(*q >> (8-1-rr)); //p = p - rr; while (1) { - rr = _popCount[*q]; + //rr = _popCount[*q]; + rr = _fast_popcount(*q); if (r + rr >= i) break; r += rr; //p += 8; @@ -487,7 +521,8 @@ int selectd2_select2(selectd2 *select, int i,int f, int *st, int *en) { if ((i>>logL) == ((i+1)>>logL)) { i++; while (1) { - rr = _popCount[*q]; + //rr = _popCount[*q]; + r = _fast_popcount(*q); if (r + rr >= i) break; r += rr; q++; @@ -502,11 +537,13 @@ int selectd2_select2(selectd2 *select, int i,int f, int *st, int *en) { } else { rr = p & (8-1); - r -= _popCount[(*q ^ 0xff) >> (8-1-rr)]; + //r -= _popCount[(*q ^ 0xff) >> (8-1-rr)]; + r -= _fast_popcount((*q ^ 0xff) >> (8-1-rr)); //p = p - rr; while (1) { - rr = _popCount[*q ^ 0xff]; + //rr = _popCount[*q ^ 0xff]; + rr = _fast_popcount(*q ^ 0xff); if (r + rr >= i) break; r += rr; //p += 8; @@ -518,7 +555,8 @@ int selectd2_select2(selectd2 *select, int i,int f, int *st, int *en) { if ((i>>logL) == ((i+1)>>logL)) { i++; while (1) { - rr = _popCount[*q ^ 0xff]; + //rr = _popCount[*q ^ 0xff]; + rr = _fast_popcount(*q ^ 0xff); if (r + rr >= i) break; r += rr; q++; diff --git a/libcds/src/static_sequence/static_sequence_bs.cpp b/libcds/src/static_sequence/static_sequence_bs.cpp index 283b89c..83f18c7 100644 --- a/libcds/src/static_sequence/static_sequence_bs.cpp +++ b/libcds/src/static_sequence/static_sequence_bs.cpp @@ -52,7 +52,7 @@ uint static_sequence_bs::rank(uint c, uint i) { if(am->map(c)>=sigma) return (uint)-1; return bitmaps[am->map(c)]->rank1(i); } - +/* uint static_sequence_bs::select(uint c, uint i) { if(am->map(c)>=sigma) return (uint)-1; return bitmaps[am->map(c)]->select1(i); @@ -62,7 +62,15 @@ uint static_sequence_bs::select_next(uint c, uint i) { if(am->map(c)>=sigma) return (uint)-1; return bitmaps[am->map(c)]->select_next1(i); } - +*/ +uint static_sequence_bs::select(uint c, uint i) { + if(c>=sigma) return (uint)-1; + return bitmaps[c]->select1(i); +} +uint static_sequence_bs::select_next(uint c, uint i) { + if(c>=sigma) return (uint)-1; + return bitmaps[c]->select_next1(i); +} uint static_sequence_bs::access(uint i) { for(uint j=0;jaccess(i)) return am->unmap(j); diff --git a/timings.h b/timings.h new file mode 100644 index 0000000..684d4e9 --- /dev/null +++ b/timings.h @@ -0,0 +1,73 @@ +#ifndef TIMINGS_H_ +#define TIMINGS_H_ +//Timing Macro's +#include +#include +#include +#include +#include + +static double tParsing = 0; +static unsigned int cParsing = 0; + +static double tLoading = 0; +static unsigned int cLoading = 0; +static double tBuilding = 0; +static unsigned int cBuilding = 0; + +static struct timeval tmpv1; +static struct timeval tmpv2; +static string mem1; +static string mem2; + +static void read_procmem(string& memstr) { + std::string buf; + pid_t pid = getpid(); + std::stringstream path; + path << "/proc/" << pid << "/status"; + std::ifstream infile; + infile.open (path.str().c_str(), std::ifstream::in); + while (infile.good()){ + getline(infile,buf); + if (infile.eof()) { + memstr = "Could not read memory"; + return; + }; + int idx = buf.find("VmRSS"); + if (idx == 0){ + memstr = buf; + return; + }; + }; + memstr = "Could not read memory"; + return; + +} + +#define STARTTIMER() do { \ + read_procmem(mem1); \ + gettimeofday(&tmpv1,NULL); \ + } while (0) \ + +#define STOPTIMER(x) do { \ + gettimeofday(&tmpv2,NULL); \ + read_procmem(mem2); \ + (t##x) = ((tmpv2.tv_sec - tmpv1.tv_sec) * 1000000.0 + \ + (tmpv2.tv_usec - tmpv1.tv_usec))/1000.0; \ + (c##x)= (c##x)+1; \ + } while (0) + +#define PRINTTIME(s,x) do { \ + std::cerr << (s) << " : " << (t##x) << "ms" << std::endl; \ + std::cerr << "Mem use before: " << mem1 << std::endl; \ + std::cerr << "Mem use after: " << mem2 << std::endl; \ + } while (0) \ + + + + + + + + +#endif -- 2.17.1