6 #include <unordered_map>
8 #include <bp/bp-darray.h>
9 #include <libcds/includes/basics.h>
10 #include <libcds/includes/static_bitsequence.h>
11 #include <libcds/includes/alphabet_mapper.h>
12 #include <libcds/includes/static_sequence.h>
13 #include "bit-vector.hpp"
19 #include <TextCollection/TextCollection.h>
20 #include <TextCollection/TextCollectionBuilder.h>
22 class xml_tree_builder;
26 typedef int32_t node_t;
27 typedef int32_t tag_t;
29 static const node_t NIL = -1;
30 static const node_t ROOT = 0;
32 static const char* NIL_TAG;
33 static const tag_t NIL_TAG_ID = -1;
34 static const char* DOCUMENT_OPEN_TAG;
35 static const tag_t DOCUMENT_OPEN_TAG_ID = 0;
36 static const char* ATTRIBUTE_OPEN_TAG;
37 static const tag_t ATTRIBUTE_OPEN_TAG_ID = 1;
38 static const char* PCDATA_OPEN_TAG;
39 static const tag_t PCDATA_OPEN_TAG_ID = 2;
40 static const char* ATTRIBUTE_DATA_OPEN_TAG;
41 static const tag_t ATTRIBUTE_DATA_OPEN_TAG_ID = 3;
42 static const char* CLOSE_TAG;
43 static const tag_t CLOSE_TAG_ID = 4;
49 inline uint32_t size() const;
50 inline uint32_t num_tags() const;
51 inline uint32_t subtree_size(node_t) const;
52 inline uint32_t subtree_tags(node_t, tag_t) const;
53 uint32_t subtree_elements(node_t) const;
54 uint32_t num_children(node_t) const;
55 uint32_t child_pos(node_t) const;
59 inline bool is_leaf(node_t) const;
60 inline bool is_ancestor(node_t, node_t) const;
61 inline bool is_right_descendant(node_t, node_t) const;
62 bool is_child(node_t, node_t) const;
63 inline bool is_first_child(node_t) const;
64 inline bool is_nil(node_t) const;
65 inline bool is_open(node_t) const;
67 uint32_t depth(node_t) const;
68 uint32_t preorder(node_t) const;
69 uint32_t postorder(node_t) const;
72 inline tag_t tag(node_t) const;
73 const char* get_tag_name_by_ref(tag_t) const;
74 tag_t register_tag(char *s);
76 //Navigation functions
77 inline node_t root () const;
78 node_t child(node_t, uint32_t) const;
79 inline node_t parent(node_t) const;
80 inline node_t first_child(node_t) const;
81 inline node_t last_child(node_t) const;
82 inline node_t next_sibling(node_t) const;
83 inline node_t prev_sibling(node_t) const;
84 inline node_t first_element(node_t) const;
85 inline node_t next_element(node_t) const;
86 inline node_t tagged_next(node_t, tag_t) const;
87 inline node_t tagged_descendant(node_t, tag_t) const;
88 inline node_t tagged_following_before(node_t, tag_t, node_t) const;
89 inline node_t tagged_child(node_t, tag_t) const;
90 inline node_t tagged_sibling(node_t, tag_t) const;
91 node_t select_child(node_t, tag_t*) const;
92 node_t select_descendant(node_t, tag_t*) const;
93 node_t select_sibling(node_t, tag_t*) const;
94 node_t select_following_before (node_t, tag_t*, node_t) const;
95 inline node_t closing(node_t) const;
98 inline node_t parent_node(int32_t) const;
99 inline SXSI::TextCollection *get_text_collection() const;
100 std::pair<int32_t, int32_t> text_id_range(node_t) const;
101 int32_t text_id(node_t) const;
102 unsigned char* get_text(int32_t) const;
104 SXSI::TextCollection::document_result prefix(uchar const *s) const;
105 SXSI::TextCollection::document_result suffix(uchar const *s) const;
106 SXSI::TextCollection::document_result equals(uchar const *s) const;
107 SXSI::TextCollection::document_result contains(uchar const *s) const;
108 SXSI::TextCollection::document_result less_than(uchar const *s) const;
111 void save(int, char*);
112 static xml_tree* load(int, char*, bool, int);
113 void print(node_t, int, bool no_text=false);
117 friend class xml_tree_builder;
119 xml_tree(std::vector<int32_t>*,
120 std::unordered_map<std::string, int32_t>*,
123 SXSI::TextCollectionBuilder *,
124 SXSI::TextCollectionBuilder::index_type_t,
127 //Parenthesis sequence
130 static_sequence *tags;
132 uint32_t tag_seq_len;
133 uint32_t bits_per_tag;
134 //Mapping from tag_t identifiers to/from tagnames
135 std::vector<std::string> *tag_names;
136 std::unordered_map<std::string, tag_t> *tag_ids;
138 SXSI::TextCollection *text_collection;
139 static_bitsequence *text_positions;
140 SXSI::TextCollectionBuilder::index_type_t text_index_type;
141 //auxiliary data structures for efficient printing.
142 std::vector<std::string> *print_stack;
143 std::string *print_buffer;
145 #define BUFFER_SIZE (8192*2)
148 void uflush_r(int, size_t);
149 void uput_str(std::string s, int fd);
150 void uputs(const char* s, int fd);
151 void uputc(const char c, int fd);
152 size_t uprintf(const char*s, int fd);
155 #define XML_TREE_INTERNAL__ 1
156 #include "xml-tree-inc.hpp"
158 #endif //XML_TREE_HPP_