X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=XMLTree.h;h=6d11d78de3e5c2fa624eeb937209f3c8a5f8b9e6;hb=f32808a35be7a1e62830a5972473178014fa44e5;hp=00ad143f5e8e6075300cbb96ffebeb7436d1a8c5;hpb=b2df171c52f1e6d35a8b131299e4a7f494520333;p=SXSI%2FXMLTree.git diff --git a/XMLTree.h b/XMLTree.h index 00ad143..6d11d78 100644 --- a/XMLTree.h +++ b/XMLTree.h @@ -20,23 +20,13 @@ #ifndef XMLTREE_H_ #define XMLTREE_H_ -extern "C" { -#define CAML_NAME_SPACE -#include -#include -#define XMLTREE(x) ((XMLTree *)(* (XMLTree**) Data_custom_val(x))) - //#define XMLTREE(x) ((XMLTree*) (x)) -} + + #include #include #include #include "TextCollection/TextCollectionBuilder.h" -#include -#include -#include - - #undef W #undef WW #undef Wminusone @@ -50,7 +40,6 @@ using SXSI::TextCollection; using SXSI::TextCollectionBuilder; - // this constant is used to efficiently compute the child operation in the tree #define OPTD 10 @@ -96,7 +85,7 @@ typedef struct { typedef std::unordered_set TagIdSet; -typedef std::unordered_map TagIdMap; +typedef std::unordered_map TagIdMap; typedef TagIdMap::const_iterator TagIdMapIT; #define REGISTER_TAG(v,h,t) do { (h)->insert(std::make_pair((t),(v)->size()));\ @@ -123,7 +112,7 @@ class XMLTree { bp *Par; /** Mapping from tag identifer to tag name */ - vector *TagName; + std::vector *TagName; TagIdMap * tIdMap; /** Bit vector indicating with a 1 the positions of the non-empty texts. */ @@ -141,13 +130,47 @@ class XMLTree { bool disable_tc; FILE* stream; - int stream_fd; + int stream_fd; + std::string * buffer; + void myfputs(const char* s, FILE * fp){ + buffer->append(s); + if (buffer->size() >= 100000){ + fputs(buffer->c_str(),fp); + buffer->clear(); + }; + } + void myfputc(const char c, FILE*fp){ + buffer->append(1,c); + if (buffer->size() >= 100000){ + fputs(buffer->c_str(),fp); + buffer->clear(); + }; + } + void mybufferflush(FILE* fp){ + fputs(buffer->c_str(), fp); + buffer->clear(); + } + + size_t myfprintf(const char* s, FILE * fp){ + if (s == NULL) + return 0; + size_t i = buffer->size(); + buffer->append(s); + size_t j = buffer->size(); + if (buffer->size() >= 100000){ + fputs(buffer->c_str(),fp); + buffer->clear(); + }; + return (j-i); + } + + void PrintNode(treeNode n, int fd); /** Data structure constructors */ - XMLTree(){;}; + XMLTree(){ buffer = 0;}; // non const pointer are freed by this method. - XMLTree( pb * const par, uint npar, vector * const TN, TagIdMap * const tim, uint *empty_texts_bmp, TagType *tags, + XMLTree( pb * const par, uint npar, std::vector * const TN, TagIdMap * const tim, uint *empty_texts_bmp, TagType *tags, TextCollection * const TC, bool dis_tc); public: @@ -237,7 +260,7 @@ public: * if none. */ treeNode FirstElement(treeNode x); - value CamlFirstElement(value x); + /** LastChild(x): returns the last child of node x. */ treeNode LastChild(treeNode x); @@ -250,7 +273,7 @@ public: * if none. */ treeNode NextElement(treeNode x); - value CamlNextElement(value x); + /** PrevSibling(x): returns the previous sibling of node x, assuming it * exists. */ @@ -439,7 +462,7 @@ public: uchar* GetText(DocID d) { uchar * s = Text->GetText(d); - return (s[0] == 1 ? (uchar*)"" : s); + return (s[0] == 1 ? (s+1) : s); } /** GetText(i, j): returns the texts corresponding to documents with @@ -477,8 +500,6 @@ public: }; -extern "C" value caml_cpp_fast_first_element(value xmltree, value node); -extern "C" value caml_cpp_fast_next_element(value xmltree, value node);