\r
#ifndef XMLTREE_H_\r
#define XMLTREE_H_\r
+\r
+\r
#include <unordered_set>\r
#include <unordered_map>\r
+#include <sstream>\r
#include "TextCollection/TextCollectionBuilder.h"\r
-#include <stdio.h>\r
-#include <stdlib.h>\r
-#include <cstring>\r
-\r
\r
#undef W\r
#undef WW\r
#define PCDATA_TAG_ID 2\r
#define ATTRIBUTE_DATA_OPEN_TAG "<@$>"\r
#define ATTRIBUTE_DATA_TAG_ID 3\r
+#define CLOSING_TAG "</>"\r
+#define CLOSING_TAG_ID 4\r
#define DOCUMENT_CLOSE_TAG "/"\r
#define ATTRIBUTE_CLOSE_TAG "/<@>"\r
#define PCDATA_CLOSE_TAG "/<$>"\r
\r
\r
typedef std::unordered_set<int> TagIdSet;\r
-typedef std::unordered_map<string,int> TagIdMap;\r
+typedef std::unordered_map<std::string,int> TagIdMap;\r
typedef TagIdMap::const_iterator TagIdMapIT;\r
\r
#define REGISTER_TAG(v,h,t) do { (h)->insert(std::make_pair((t),(v)->size()));\\r
#define NULLT_IF(x) do { if (x) return NULLT; } while (0)\r
\r
\r
+\r
+\r
+\r
+\r
class XMLTreeBuilder;\r
\r
class XMLTree {\r
bp *Par;\r
\r
/** Mapping from tag identifer to tag name */ \r
- vector<string> *TagName;\r
+ std::vector<std::string> *TagName;\r
TagIdMap * tIdMap;\r
\r
/** Bit vector indicating with a 1 the positions of the non-empty texts. */\r
bool disable_tc;\r
\r
FILE* stream;\r
- int stream_fd;\r
+ int stream_fd; \r
+ std::string * buffer;\r
+ void myfputs(const char* s, FILE * fp){\r
+ buffer->append(s);\r
+ if (buffer->size() >= 100000){\r
+ fputs(buffer->c_str(),fp);\r
+ buffer->clear();\r
+ };\r
\r
+ }\r
+ void myfputc(const char c, FILE*fp){\r
+ buffer->append(1,c);\r
+ if (buffer->size() >= 100000){\r
+ fputs(buffer->c_str(),fp);\r
+ buffer->clear();\r
+ };\r
+ }\r
+ void mybufferflush(FILE* fp){\r
+ fputs(buffer->c_str(), fp);\r
+ buffer->clear();\r
+ }\r
+\r
+ size_t myfprintf(const char* s, FILE * fp){\r
+ if (s == NULL)\r
+ return 0;\r
+ size_t i = buffer->size();\r
+ buffer->append(s);\r
+ size_t j = buffer->size();\r
+ if (buffer->size() >= 100000){\r
+ fputs(buffer->c_str(),fp);\r
+ buffer->clear();\r
+ };\r
+ return (j-i);\r
+ }\r
+\r
+ void PrintNode(treeNode n, int fd);\r
/** Data structure constructors */\r
- XMLTree(){;};\r
+ XMLTree(){ buffer = 0;};\r
\r
// non const pointer are freed by this method.\r
- XMLTree( pb * const par, uint npar, vector<string> * const TN, TagIdMap * const tim, uint *empty_texts_bmp, TagType *tags,\r
+ XMLTree( pb * const par, uint npar, std::vector<std::string> * const TN, TagIdMap * const tim, uint *empty_texts_bmp, TagType *tags,\r
TextCollection * const TC, bool dis_tc);\r
\r
public: \r
~XMLTree();\r
\r
/** root(): returns the tree root. */\r
- treeNode Root();\r
+ treeNode Root() { return 0; }\r
\r
/** Size() : Number of parenthesis */\r
unsigned int Size(){\r
* if none.\r
*/\r
treeNode NextElement(treeNode x);\r
- \r
+\r
/** PrevSibling(x): returns the previous sibling of node x, assuming it \r
* exists. */\r
\r
uchar* GetText(DocID d) {\r
\r
uchar * s = Text->GetText(d);\r
- return (s[0] == 1 ? (uchar*)"" : s);\r
+ return (s[0] == 1 ? (s+1) : s);\r
}\r
\r
/** GetText(i, j): returns the texts corresponding to documents with\r
\r
\r
/** Print procedure */\r
- void Print(int fd,treeNode x);\r
+ void Print(int fd,treeNode x, bool no_text);\r
+ void Print(int fd,treeNode x) { Print(fd,x,false); }\r
\r
};\r
+\r
+\r
+\r
+\r
#endif\r
\r