Huge refactoring to remove diego' C/C++ chimera code.
[SXSI/XMLTree.git] / xml-tree-builder.hpp
diff --git a/xml-tree-builder.hpp b/xml-tree-builder.hpp
new file mode 100644 (file)
index 0000000..7550824
--- /dev/null
@@ -0,0 +1,48 @@
+#ifndef XML_TREE_BUILDER_HPP_
+#define XML_TREE_BUILDER_HPP_
+
+#include <cstdint>
+#include <unordered_map>
+#include "xml-tree.hpp"
+#include "bit-vector.hpp"
+#undef W
+#undef WW
+#undef Wminusone
+#include <TextCollection/TextCollectionBuilder.h>
+
+class xml_tree_builder {
+
+public:
+  xml_tree_builder();
+  ~xml_tree_builder();
+  void open_document(bool disable_text_index,
+                     unsigned int sample_rate,
+                     SXSI::TextCollectionBuilder::index_type_t idx_type);
+
+  xml_tree *close_document();
+  void open_tag(std::string);
+  void close_tag(std::string);
+  void text(std::string);
+
+private:
+  void reset();
+  int32_t register_tag(std::string);
+  int32_t register_tag(std::string, int32_t);
+
+
+  bit_vector *par;
+  std::vector<int32_t> *tags;
+  int32_t current_tag;
+  std::unordered_map<std::string, int32_t> *tag_ids;
+  bool opened;
+
+
+  bit_vector *text_positions;
+  SXSI::TextCollectionBuilder *tc_builder;
+  bool disable_text_index;
+  SXSI::TextCollectionBuilder::index_type_t text_index_type;
+
+};
+
+
+#endif