Documentation step 2: Properly document node test functions.
[SXSI/XMLTree.git] / xml-tree-builder.cpp
1 #include "xml-tree-builder.hpp"
2 #include <stdexcept>
3 #include <cstdio>
4 #include <utility>
5
6 using namespace SXSI;
7
8 xml_tree_builder::xml_tree_builder()
9 {
10   opened = false;
11   par = 0;
12   tags = 0;
13   tag_ids = 0;
14   text_positions = 0;
15   disable_text_index = false;
16   tc_builder = 0;
17 }
18
19 xml_tree_builder::~xml_tree_builder()
20 {
21   if (opened) reset();
22 }
23
24 void xml_tree_builder::reset()
25 {
26   delete par;
27   delete tags;
28   delete tag_ids;
29   if (!disable_text_index){
30     delete tc_builder;
31     delete text_positions;
32   };
33 }
34
35
36 int32_t xml_tree_builder::register_tag(std::string tag, int32_t id)
37 {
38   auto found = tag_ids->find(tag);
39
40   if (found == tag_ids->end()) {
41     if (id != current_tag)
42       throw std::runtime_error("xml-tree-builder: inconsistant tag id");
43
44     tag_ids->insert(std::make_pair(tag, id));
45     current_tag++;
46     return id;
47   } else
48     return found->second;
49
50 }
51
52 int32_t xml_tree_builder::register_tag(std::string tag)
53 {
54   return register_tag(tag, current_tag);
55 }
56
57 void
58 xml_tree_builder::open_document(bool disable_text_index,
59                                 unsigned int sample_rate,
60                                 TextCollectionBuilder::index_type_t idx_type)
61 {
62   if (opened) reset();
63   opened = true;
64   par = new bit_vector();
65   tags = new std::vector<int32_t>();
66   current_tag = 0;
67   tag_ids = new std::unordered_map<std::string, int32_t>();
68
69   register_tag(xml_tree::DOCUMENT_OPEN_TAG, xml_tree::DOCUMENT_OPEN_TAG_ID);
70   register_tag(xml_tree::DOCUMENT_OPEN_TAG, xml_tree::DOCUMENT_OPEN_TAG_ID);
71
72   register_tag(xml_tree::ATTRIBUTE_OPEN_TAG, xml_tree::ATTRIBUTE_OPEN_TAG_ID);
73   register_tag(xml_tree::ATTRIBUTE_OPEN_TAG, xml_tree::ATTRIBUTE_OPEN_TAG_ID);
74
75   register_tag(xml_tree::PCDATA_OPEN_TAG, xml_tree::PCDATA_OPEN_TAG_ID);
76   register_tag(xml_tree::PCDATA_OPEN_TAG, xml_tree::PCDATA_OPEN_TAG_ID);
77
78   register_tag(xml_tree::ATTRIBUTE_DATA_OPEN_TAG,
79                xml_tree::ATTRIBUTE_DATA_OPEN_TAG_ID);
80
81   register_tag(xml_tree::ATTRIBUTE_DATA_OPEN_TAG,
82                xml_tree::ATTRIBUTE_DATA_OPEN_TAG_ID);
83
84   register_tag(xml_tree::CLOSE_TAG,
85                xml_tree::CLOSE_TAG_ID);
86
87   this->disable_text_index = disable_text_index;
88   if (!disable_text_index){
89     fprintf(stderr, "Sample rate is %u\n", sample_rate);
90     tc_builder = TextCollectionBuilder::create(sample_rate, idx_type);
91     text_positions = new bit_vector();
92     text_index_type = idx_type;
93   };
94 }
95
96 void xml_tree_builder::open_tag(std::string tag)
97 {
98   int32_t id = register_tag(tag);
99   tags->push_back(id);
100   par->push_back(true);
101   if (!disable_text_index) text_positions->set_le(text_positions->size(), false);
102 }
103
104 void xml_tree_builder::close_tag(std::string)
105 {
106   xml_tree::tag_t t = xml_tree::CLOSE_TAG_ID;
107   tags->push_back(t);
108   par->push_back(false);
109   if (!disable_text_index) text_positions->set_le(text_positions->size(), false);
110 }
111
112 void xml_tree_builder::text(std::string s)
113 {
114   if (!disable_text_index){
115     if (s.empty()) s = "\001";
116     tc_builder->InsertText((const unsigned char *) s.c_str());
117     text_positions->set_le(text_positions->size() - 1, true);
118   }
119 }
120
121 xml_tree *xml_tree_builder::close_document()
122 {
123   if (opened) {
124     opened = false;
125     auto tags_ = tags;
126     auto tag_ids_ = tag_ids;
127     auto par_ = par;
128     auto tc_builder_ = tc_builder;
129     auto text_positions_ = text_positions;
130     tc_builder = 0;
131     text_positions = 0;
132     tags = 0;
133     tag_ids = 0;
134     par = 0;
135     return new xml_tree(tags_, tag_ids_, par_,
136                         disable_text_index,
137                         tc_builder_,
138                         text_index_type,
139                         text_positions_);
140   };
141
142   throw std::runtime_error("xml_tree_builder: inconsistent parser state");
143 }
144