X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=XMLTree.h;h=6d11d78de3e5c2fa624eeb937209f3c8a5f8b9e6;hb=f32808a35be7a1e62830a5972473178014fa44e5;hp=c2a0210b81d54401cab9968fcf0455fc93d3d63a;hpb=b443dd155b926655d84a3eadef5be09907d6c5eb;p=SXSI%2FXMLTree.git diff --git a/XMLTree.h b/XMLTree.h index c2a0210..6d11d78 100644 --- a/XMLTree.h +++ b/XMLTree.h @@ -20,23 +20,13 @@ #ifndef XMLTREE_H_ #define XMLTREE_H_ -extern "C" { -#define CAML_NAME_SPACE -#include -#include -#define XMLTREE(x) ((XMLTree *)(* (XMLTree**) Data_custom_val(x))) - //#define XMLTREE(x) ((XMLTree*) (x)) -} + + #include #include #include #include "TextCollection/TextCollectionBuilder.h" -#include -#include -#include - - #undef W #undef WW #undef Wminusone @@ -50,7 +40,6 @@ using SXSI::TextCollection; using SXSI::TextCollectionBuilder; - // this constant is used to efficiently compute the child operation in the tree #define OPTD 10 @@ -96,7 +85,7 @@ typedef struct { typedef std::unordered_set TagIdSet; -typedef std::unordered_map TagIdMap; +typedef std::unordered_map TagIdMap; typedef TagIdMap::const_iterator TagIdMapIT; #define REGISTER_TAG(v,h,t) do { (h)->insert(std::make_pair((t),(v)->size()));\ @@ -123,7 +112,7 @@ class XMLTree { bp *Par; /** Mapping from tag identifer to tag name */ - vector *TagName; + std::vector *TagName; TagIdMap * tIdMap; /** Bit vector indicating with a 1 the positions of the non-empty texts. */ @@ -142,31 +131,46 @@ class XMLTree { FILE* stream; int stream_fd; - string buffer; + std::string * buffer; void myfputs(const char* s, FILE * fp){ - buffer.append(s); - if (buffer.size() >= 1000000){ - fputs(buffer.c_str(),fp); - buffer.clear(); + buffer->append(s); + if (buffer->size() >= 100000){ + fputs(buffer->c_str(),fp); + buffer->clear(); }; } void myfputc(const char c, FILE*fp){ - buffer.append(1,c); - if (buffer.size() >= 1000000){ - fputs(buffer.c_str(),fp); - buffer.clear(); + buffer->append(1,c); + if (buffer->size() >= 100000){ + fputs(buffer->c_str(),fp); + buffer->clear(); }; } void mybufferflush(FILE* fp){ - fputs(buffer.c_str(), fp); - buffer.clear(); + fputs(buffer->c_str(), fp); + buffer->clear(); } + + size_t myfprintf(const char* s, FILE * fp){ + if (s == NULL) + return 0; + size_t i = buffer->size(); + buffer->append(s); + size_t j = buffer->size(); + if (buffer->size() >= 100000){ + fputs(buffer->c_str(),fp); + buffer->clear(); + }; + return (j-i); + } + + void PrintNode(treeNode n, int fd); /** Data structure constructors */ - XMLTree(){;}; + XMLTree(){ buffer = 0;}; // non const pointer are freed by this method. - XMLTree( pb * const par, uint npar, vector * const TN, TagIdMap * const tim, uint *empty_texts_bmp, TagType *tags, + XMLTree( pb * const par, uint npar, std::vector * const TN, TagIdMap * const tim, uint *empty_texts_bmp, TagType *tags, TextCollection * const TC, bool dis_tc); public: @@ -256,7 +260,7 @@ public: * if none. */ treeNode FirstElement(treeNode x); - value CamlFirstElement(value x); + /** LastChild(x): returns the last child of node x. */ treeNode LastChild(treeNode x); @@ -269,7 +273,7 @@ public: * if none. */ treeNode NextElement(treeNode x); - value CamlNextElement(value x); + /** PrevSibling(x): returns the previous sibling of node x, assuming it * exists. */ @@ -458,7 +462,7 @@ public: uchar* GetText(DocID d) { uchar * s = Text->GetText(d); - return (s[0] == 1 ? (uchar*)"" : s); + return (s[0] == 1 ? (s+1) : s); } /** GetText(i, j): returns the texts corresponding to documents with @@ -496,8 +500,6 @@ public: }; -extern "C" value caml_cpp_fast_first_element(value xmltree, value node); -extern "C" value caml_cpp_fast_next_element(value xmltree, value node);