X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=XMLTree.h;h=4673ccc989142c89cf7b45ef14822a4d6f6ba644;hb=8b92ac7e539c796ee3160078b5ca30537f26ea51;hp=5c010a5e83875f2d8916db7304b1f3f9eacf4828;hpb=44c3b5aabb8782b15e66d7d14ab19b280d7eb20f;p=SXSI%2FXMLTree.git diff --git a/XMLTree.h b/XMLTree.h index 5c010a5..4673ccc 100644 --- a/XMLTree.h +++ b/XMLTree.h @@ -96,6 +96,8 @@ typedef TagIdMap::const_iterator TagIdMapIT; // Direct calls to sarray library +#define BUFFER_ALLOC (8192 * 2) +#define BUFFER_SIZE (BUFFER_ALLOC / 2) static inline int fast_find_close(bp *b,int s) { return fwd_excess(b,s,-1); @@ -158,7 +160,7 @@ class XMLTree { /** Bit vector indicating with a 1 the positions of the non-empty texts. */ static_bitsequence *EBVector; - + /** Tag sequence represented with a data structure for rank and select */ static_sequence *Tags; uint * tags_fix; @@ -169,59 +171,65 @@ class XMLTree { // Allows to disable the TextCollection for benchmarkin purposes bool disable_tc; + SXSI::TextCollectionBuilder::index_type_t text_index_type; - FILE* stream; - int stream_fd; - std::string * buffer; + std::string *buffer; + std::vector *print_stack; void _flush(int fd){ size_t size = buffer->size(); - size_t written = write(fd, buffer->data(), size); - if (written != size) - throw "Cannot flush buffer"; + if (size < BUFFER_SIZE) return; + size_t written; + while (1) { + written = write(fd, buffer->data(), size); + if ((written < 0) && (errno == EAGAIN || errno == EINTR)) + continue; + break; + }; buffer->clear(); } + void _dput_str(std::string s, int fd){ + buffer->append(s); + _flush(fd); + } + void _dputs(const char* s, int fd){ buffer->append(s); - if (buffer->size() >= 131072) _flush(fd); - + _flush(fd); } void _dputc(const char c, int fd){ - buffer->append(1,c); - if (buffer->size() >= 131072) _flush(fd); + buffer->push_back(c); } size_t _dprintf(const char* s, int fd){ if (s == NULL) return 0; - size_t i = 0; - while (1) { - switch (s[i]) { + size_t i; + char c; + for (i = 0; (c = s[i]); i++) { + switch (c) { + case '"': + _dputs(""", fd); + break; case '&': - buffer->append("&"); + _dputs("&", fd); break; case '\'': - buffer->append("'"); - break; - case '"': - buffer->append("""); + _dputs("'", fd); break; case '<': - buffer->append("<"); + _dputs("<", fd); break; case '>': - buffer->append(">"); + _dputs(">", fd); break; - case 0: - return i; default: - buffer->append(1, s[i]); + _dputc(c, fd); }; - if (buffer->size() >= 131072) _flush(fd); - ++i; }; + return i; } void PrintNode(treeNode n, int fd); @@ -229,8 +237,13 @@ class XMLTree { XMLTree(){ buffer = 0;}; // non const pointer are freed by this method. - XMLTree( pb * const par, uint npar, std::vector * const TN, TagIdMap * const tim, uint *empty_texts_bmp, TagType *tags, - TextCollection * const TC, bool dis_tc); + XMLTree( pb * const par, + uint npar, + std::vector * const TN, + TagIdMap * const tim, uint *empty_texts_bmp, + TagType *tags, + TextCollection * const TC, bool dis_tc, + TextCollectionBuilder::index_type_t _index_type ); public: /** Data structure destructor */ @@ -329,6 +342,16 @@ public: else return parent(Par, x); }; + + treeNode BinaryParent(treeNode x){ + if (x <= Root()) + return NULLT; + else { + treeNode prev = x - 1; + return (fast_inspect(Par, prev) == OP) ? prev : find_open(Par, prev); + }; + }; + /* Assumes x is neither 0 nor -1 */ /** Child(x,i): returns the i-th child of node x, assuming it exists. */ @@ -337,14 +360,20 @@ public: /** LastChild(x): returns the last child of node x. */ - treeNode LastChild(treeNode x); + treeNode LastChild(treeNode x) { + NULLT_IF(x == NULLT || fast_isleaf(Par,x)); + return find_open(Par, fast_find_close(Par, x)-1); + } - - /** PrevSibling(x): returns the previous sibling of node x, assuming it * exists. */ - treeNode PrevSibling(treeNode x); + treeNode PrevSibling(treeNode x) + { + NULLT_IF(x==NULLT); + return prev_sibling(Par, x); + } + /** TaggedChild(x,tag): returns the first child of node x tagged tag, or * NULLT if there is none. Because of the balanced-parentheses representation @@ -577,11 +606,11 @@ public: } /** Save: saves XML tree data structure to file. */ - void Save(int fd, char *filename); + void Save(int fd ); /** Load: loads XML tree data structure from file. sample_rate_text * indicates the sample rate for the text search data structure. */ - static XMLTree *Load(int fd, char *filename, bool load_tc, int sample_factor); + static XMLTree *Load(int fd, bool load_tc, int sample_factor); void insertTag(TagType tag, uint position); @@ -597,6 +626,7 @@ public: /** Print procedure */ void Print(int fd,treeNode x, bool no_text); void Print(int fd,treeNode x) { Print(fd,x,false); } + void Flush(int fd){ _flush(fd); } // The following are inlined here for speed /** Tag(x): returns the tag identifier of node x. */