Remove spurious printfs.
[SXSI/XMLTree.git] / xml-tree-builder.cpp
1 #include "xml-tree-builder.hpp"
2 #include <stdexcept>
3 #include <cstdio>
4 #include <utility>
5
6 using namespace SXSI;
7
8 xml_tree_builder::xml_tree_builder()
9 {
10   opened = false;
11   par = 0;
12   tags = 0;
13   tag_ids = 0;
14   text_positions = 0;
15   disable_text_index = false;
16   tc_builder = 0;
17 }
18
19 xml_tree_builder::~xml_tree_builder()
20 {
21   if (opened) reset();
22 }
23
24 void xml_tree_builder::reset()
25 {
26   delete par;
27   delete tags;
28   delete tag_ids;
29   if (!disable_text_index){
30     delete tc_builder;
31     delete text_positions;
32   };
33 }
34
35
36 int32_t xml_tree_builder::register_tag(std::string tag, int32_t id)
37 {
38   auto found = tag_ids->find(tag);
39
40   if (found == tag_ids->end()) {
41     if (id != current_tag)
42       throw std::runtime_error("xml-tree-builder: inconsistant tag id");
43
44     tag_ids->insert(std::make_pair(tag, id));
45     current_tag++;
46     return id;
47   } else
48     return found->second;
49
50 }
51
52 int32_t xml_tree_builder::register_tag(std::string tag)
53 {
54   return register_tag(tag, current_tag);
55 }
56
57 void
58 xml_tree_builder::open_document(bool disable_text_index,
59                                 unsigned int sample_rate,
60                                 TextCollectionBuilder::index_type_t idx_type)
61 {
62   if (opened) reset();
63   opened = true;
64   par = new bit_vector();
65   tags = new std::vector<int32_t>();
66   current_tag = 0;
67   tag_ids = new std::unordered_map<std::string, int32_t>();
68
69   register_tag(xml_tree::DOCUMENT_OPEN_TAG, xml_tree::DOCUMENT_OPEN_TAG_ID);
70   register_tag(xml_tree::DOCUMENT_OPEN_TAG, xml_tree::DOCUMENT_OPEN_TAG_ID);
71
72   register_tag(xml_tree::ATTRIBUTE_OPEN_TAG, xml_tree::ATTRIBUTE_OPEN_TAG_ID);
73   register_tag(xml_tree::ATTRIBUTE_OPEN_TAG, xml_tree::ATTRIBUTE_OPEN_TAG_ID);
74
75   register_tag(xml_tree::PCDATA_OPEN_TAG, xml_tree::PCDATA_OPEN_TAG_ID);
76   register_tag(xml_tree::PCDATA_OPEN_TAG, xml_tree::PCDATA_OPEN_TAG_ID);
77
78   register_tag(xml_tree::ATTRIBUTE_DATA_OPEN_TAG,
79                xml_tree::ATTRIBUTE_DATA_OPEN_TAG_ID);
80
81   register_tag(xml_tree::ATTRIBUTE_DATA_OPEN_TAG,
82                xml_tree::ATTRIBUTE_DATA_OPEN_TAG_ID);
83
84   register_tag(xml_tree::CLOSE_TAG,
85                xml_tree::CLOSE_TAG_ID);
86
87   this->disable_text_index = disable_text_index;
88   if (!disable_text_index){
89     tc_builder = TextCollectionBuilder::create(sample_rate, idx_type);
90     text_positions = new bit_vector();
91     text_index_type = idx_type;
92   };
93 }
94
95 void xml_tree_builder::open_tag(std::string tag)
96 {
97   int32_t id = register_tag(tag);
98   tags->push_back(id);
99   par->push_back(true);
100   if (!disable_text_index) text_positions->set_le(text_positions->size(), false);
101 }
102
103 void xml_tree_builder::close_tag(std::string)
104 {
105   xml_tree::tag_t t = xml_tree::CLOSE_TAG_ID;
106   tags->push_back(t);
107   par->push_back(false);
108   if (!disable_text_index) text_positions->set_le(text_positions->size(), false);
109 }
110
111 void xml_tree_builder::text(std::string s)
112 {
113   if (!disable_text_index){
114     if (s.empty()) s = "\001";
115     tc_builder->InsertText((const unsigned char *) s.c_str());
116     text_positions->set_le(text_positions->size() - 1, true);
117   }
118 }
119
120 xml_tree *xml_tree_builder::close_document()
121 {
122   if (opened) {
123     opened = false;
124     auto tags_ = tags;
125     auto tag_ids_ = tag_ids;
126     auto par_ = par;
127     auto tc_builder_ = tc_builder;
128     auto text_positions_ = text_positions;
129     tc_builder = 0;
130     text_positions = 0;
131     tags = 0;
132     tag_ids = 0;
133     par = 0;
134     return new xml_tree(tags_, tag_ids_, par_,
135                         disable_text_index,
136                         tc_builder_,
137                         text_index_type,
138                         text_positions_);
139   };
140
141   throw std::runtime_error("xml_tree_builder: inconsistent parser state");
142 }
143