--- /dev/null
+FLAGS = -O9 -I./libcds/includes/\r
+\r
+OBJECTS=libcds/lib/libcds.a\r
+\r
+all: text_collection XMLTree\r
+\r
+XMLTree: XMLTree.o bp.o darray.o bpcore.o libcds/lib/libcds.a\r
+ cp libcds/lib/libcds.a XMLTree.a\r
+ ar vrcs XMLTree.a XMLTree.o bp.o darray.o bpcore.o\r
+\r
+\r
+XMLTree.o: bp.c bp.h bpcore.c XMLTree.cpp XMLTree.h\r
+ g++ $(FLAGS) -c XMLTree.cpp\r
+\r
+bp.o: bp.c bpcore.c darray.c bp.h darray.h\r
+ g++ $(FLAGS) -c bp.c\r
+\r
+darray.o: darray.c darray.h bpcore.c\r
+ g++ $(FLAGS) -c darray.c\r
+\r
+bpcore.o: bpcore.c\r
+ g++ $(FLAGS) -c bpcore.c\r
+\r
+text_collection:\r
+ make -C TextCollection/\r
+\r
+clean:\r
+ rm -f *.o
\ No newline at end of file
--- /dev/null
+\r
+#include "XMLTree.h"\r
+#include <cstring>\r
+// functions to convert tag positions to the corresponding tree node and viceversa. \r
+// These are implemented in order to be able to change the tree and Tags representations, \r
+// without affecting the code so much.\r
+// Current implementation corresponds to balanced-parentheses representation for\r
+// the tree, and storing 2 tags per tree node (opening and closing tags).\r
+\r
+// tag position -> tree node\r
+inline treeNode tagpos2node(int t) {\r
+ return (treeNode)t;\r
+}\r
+\r
+// tree node -> tag position\r
+inline int node2tagpos(treeNode x) {\r
+ return (int)x;\r
+}\r
+\r
+// Save: saves XML tree data structure to file. \r
+void XMLTree::Save(unsigned char *filename) \r
+ {\r
+\r
+ FILE *fp;\r
+ char filenameaux[1024];\r
+ int i;\r
+ \r
+ sprintf(filenameaux, "%s.srx", filename);\r
+ fp = fopen(filenameaux, "w");\r
+ if (fp == NULL) {\r
+ printf("Error: cannot create file %s to store the tree structure of XML collection\n", filenameaux);\r
+ exit(1);\r
+ } \r
+ \r
+ // first stores the tree topology\r
+ saveTree(Par, fp);\r
+ \r
+ // stores the table with tag names\r
+ fwrite(&ntagnames, sizeof(int), 1, fp);\r
+ for (i=0; i<ntagnames;i++)\r
+ fprintf(fp, "%s\n",TagName[i]);\r
+ \r
+ // stores the flags\r
+ fwrite(&indexing_empty_texts, sizeof(bool), 1, fp);\r
+ fwrite(&initialized, sizeof(bool), 1, fp);\r
+ fwrite(&finished, sizeof(bool), 1, fp);\r
+ \r
+ if (!indexing_empty_texts) EBVector->save(fp);\r
+ \r
+ // stores the tags\r
+ Tags->save(fp);\r
+\r
+ // stores the texts \r
+ //Text->Save(fp);\r
+\r
+ fclose(fp);\r
+\r
+ }\r
+\r
+\r
+// Load: loads XML tree data structure from file. Returns\r
+// a pointer to the loaded data structure\r
+XMLTree *XMLTree::Load(unsigned char *filename, int sample_rate_text) \r
+ {\r
+\r
+ FILE *fp;\r
+ char filenameaux[1024];\r
+ XMLTree *XML_Tree;\r
+ int i;\r
+ \r
+ // first load the tree topology\r
+ sprintf(filenameaux, "%s.srx", filename);\r
+ fp = fopen(filenameaux, "r");\r
+ if (fp == NULL) {\r
+ printf("Error: cannot open file %s to load the tree structure of XML collection\n", filenameaux);\r
+ exit(1);\r
+ } \r
+\r
+ XML_Tree = new XMLTree();\r
+\r
+ XML_Tree->Par = (bp *)malloc(sizeof(bp));\r
+\r
+ loadTree(XML_Tree->Par, fp); \r
+ \r
+ // stores the table with tag names\r
+ fread(&XML_Tree->ntagnames, sizeof(int), 1, fp);\r
+\r
+ XML_Tree->TagName = (unsigned char **)malloc(XML_Tree->ntagnames*sizeof(unsigned char *));\r
+\r
+ for (i=0; i<XML_Tree->ntagnames;i++) {\r
+ int k = feof(fp);\r
+ fscanf(fp, "%s\n",filenameaux);\r
+ XML_Tree->TagName[i] = (unsigned char *)malloc(sizeof(unsigned char)*(strlen((const char *)filenameaux)+1));\r
+ strcpy((char *)XML_Tree->TagName[i], (const char *)filenameaux);\r
+ }\r
+ \r
+ // loads the flags\r
+ fread(&(XML_Tree->indexing_empty_texts), sizeof(bool), 1, fp);\r
+ fread(&(XML_Tree->initialized), sizeof(bool), 1, fp);\r
+ fread(&(XML_Tree->finished), sizeof(bool), 1, fp);\r
+ \r
+ if (!(XML_Tree->indexing_empty_texts)) XML_Tree->EBVector = static_bitsequence_rrr02::load(fp);\r
+\r
+ // loads the tags\r
+ XML_Tree->Tags = static_sequence_wvtree::load(fp);\r
+\r
+ // loads the texts \r
+ //XML_Tree->Text->Load(fp,sample_rate_text);\r
+\r
+ fclose(fp);\r
+ \r
+ return XML_Tree;\r
+ }\r
+\r
+\r
+// ~XMLTree: frees memory of XML tree.\r
+XMLTree::~XMLTree() \r
+ { \r
+ int i;\r
+\r
+ destroyTree(Par);\r
+ free(Par); // frees the memory of struct Par\r
+ \r
+ for (i=0; i<ntagnames;i++) \r
+ free(TagName[i]);\r
+ \r
+ free(TagName);\r
+\r
+ if (!indexing_empty_texts) {\r
+ EBVector->~static_bitsequence_rrr02();\r
+ delete EBVector;\r
+ EBVector = NULL;\r
+ }\r
+\r
+ Tags->~static_sequence_wvtree();\r
+ delete Tags;\r
+ Tags = NULL;\r
+\r
+ //Text->~TextCollection();\r
+ // delete Text;\r
+ // Text = NULL;\r
+\r
+ initialized = false;\r
+ finished = false;\r
+ }\r
+\r
+// root(): returns the tree root.\r
+treeNode XMLTree::Root() \r
+ {\r
+ return root_node(Par);\r
+ }\r
+\r
+// SubtreeSize(x): the number of nodes (and attributes) in the subtree of node x.\r
+int XMLTree::SubtreeSize(treeNode x) \r
+ {\r
+ return subtree_size(Par, x);\r
+ }\r
+\r
+// SubtreeTags(x,tag): the number of occurrences of tag within the subtree of node x.\r
+int XMLTree::SubtreeTags(treeNode x, TagType tag) \r
+ {\r
+ int s = x + 2*subtree_size(Par, x) - 1;\r
+ \r
+ return Tags->rank(tag, s) - Tags->rank(tag, node2tagpos(x)-1);\r
+ }\r
+\r
+// IsLeaf(x): returns whether node x is leaf or not. In the succinct representation\r
+// this is just a bit inspection.\r
+bool XMLTree::IsLeaf(treeNode x) \r
+ {\r
+ return isleaf(Par, x);\r
+ } \r
+\r
+// IsAncestor(x,y): returns whether node x is ancestor of node y.\r
+bool XMLTree::IsAncestor(treeNode x, treeNode y) \r
+ {\r
+ return is_ancestor(Par, x, y);\r
+ }\r
+\r
+// IsChild(x,y): returns whether node x is parent of node y.\r
+bool XMLTree::IsChild(treeNode x, treeNode y) \r
+ {\r
+ if (!is_ancestor(Par, x, y)) return false;\r
+ return depth(Par, x) == (depth(Par, y) + 1);\r
+ }\r
+\r
+// NumChildren(x): number of children of node x. Constant time with the data structure\r
+// of Sadakane.\r
+int XMLTree::NumChildren(treeNode x) \r
+ {\r
+ return degree(Par, x);\r
+ }\r
+\r
+// ChildNumber(x): returns i if node x is the i-th children of its parent.\r
+int XMLTree::ChildNumber(treeNode x) \r
+ {\r
+ return child_rank(Par, x);\r
+ }\r
+\r
+// Depth(x): depth of node x, a simple binary rank on the parentheses sequence.\r
+int XMLTree::Depth(treeNode x) \r
+ {\r
+ return depth(Par, x);\r
+ }\r
+\r
+// Preorder(x): returns the preorder number of node x, just counting the tree\r
+// nodes (i.e., tags, it disregards the texts in the tree).\r
+int XMLTree::Preorder(treeNode x) \r
+ {\r
+ return preorder_rank(Par, x);\r
+ }\r
+\r
+// Postorder(x): returns the postorder number of node x, just counting the tree\r
+// nodes (i.e., tags, it disregards the texts in the tree).\r
+int XMLTree::Postorder(treeNode x) \r
+ {\r
+ return postorder_rank(Par, x);\r
+ }\r
+\r
+// Tag(x): returns the tag identifier of node x.\r
+TagType XMLTree::Tag(treeNode x) \r
+ {\r
+ return Tags->access(node2tagpos(x));\r
+ }\r
+\r
+// DocIds(x): returns the range of text identifiers that descend from node x.\r
+// returns {NULLT, NULLT} when there are no texts descending from x.\r
+range XMLTree::DocIds(treeNode x) \r
+ {\r
+ range r;\r
+ if (indexing_empty_texts) { // faster, no rank needed\r
+ r.min = x;\r
+ r.max = x+2*subtree_size(Par, x)-2;\r
+ }\r
+ else { // we are not indexing empty texts, we need rank\r
+ int min = EBVector->rank1(x-1); \r
+ int max = EBVector->rank1(x+2*subtree_size(Par, x)-2); \r
+ if (min==max) { // range is empty, no texts within the subtree of x\r
+ r.min = NULLT;\r
+ r.max = NULLT;\r
+ }\r
+ else { // the range is non-empty, there are texts within the subtree of x\r
+ r.min = min+1;\r
+ r.max = max;\r
+ }\r
+ }\r
+ return r;\r
+ }\r
+\r
+// Parent(x): returns the parent node of node x.\r
+treeNode XMLTree::Parent(treeNode x) \r
+ {\r
+ return parent(Par, x);\r
+ }\r
+\r
+// Child(x,i): returns the i-th child of node x, assuming it exists.\r
+treeNode XMLTree::Child(treeNode x, int i) \r
+ {\r
+ if (i <= OPTD) return naive_child(Par, x, i);\r
+ else return child(Par, x, i);\r
+ }\r
+\r
+// FirstChild(x): returns the first child of node x, assuming it exists. Very fast in BP.\r
+treeNode XMLTree::FirstChild(treeNode x) \r
+ {\r
+ return first_child(Par, x);\r
+ }\r
+\r
+// NextSibling(x): returns the next sibling of node x, assuming it exists.\r
+treeNode XMLTree::NextSibling(treeNode x) \r
+ {\r
+ return next_sibling(Par, x);\r
+ }\r
+\r
+// PrevSibling(x): returns the previous sibling of node x, assuming it exists.\r
+treeNode XMLTree::PrevSibling(treeNode x) \r
+ {\r
+ return prev_sibling(Par, x);\r
+ }\r
+\r
+// TaggedChild(x,i,tag): returns the i-th child of node x tagged tag, or NULLT if there is none.\r
+// Because of the balanced-parentheses representation of the tree, this operation is not supported\r
+// efficiently, just iterating among the children of node x until finding the desired child.\r
+treeNode XMLTree::TaggedChild(treeNode x, int i, TagType tag) \r
+ {\r
+ treeNode child;\r
+ \r
+ child = first_child(Par, x); // starts at first child of node x\r
+ if (child==(treeNode)-1) return NULLT; // node x is a leaf, there is no such child\r
+ while (child!=(treeNode)-1) {\r
+ if (Tags->access(node2tagpos(child)) == tag) { // current child is labeled with tag of interest\r
+ i--;\r
+ if (i==0) return child; // we have seen i children of x tagged tag, this is the one we are looking for\r
+ }\r
+ child = next_sibling(Par, x); // OK, let's try with the next child\r
+ }\r
+ return NULLT; // no such child was found \r
+ }\r
+\r
+// TaggedDesc(x,tag): returns the first node tagged tag with larger preorder than x and within\r
+// the subtree of x. Returns NULLT if there is none.\r
+treeNode XMLTree::TaggedDesc(treeNode x, TagType tag) \r
+ {\r
+ int r, s;\r
+ treeNode y;\r
+ r = (int) Tags->rank(tag, node2tagpos(x));\r
+ s = (int) Tags->select(tag, r+1);\r
+ if (s == -1) return NULLT; // there is no such node\r
+ y = tagpos2node(s); // transforms the tag position into a node position\r
+ if (!is_ancestor(Par, x, y)) return NULLT; // the next node tagged tag (in preorder) is not within the subtree of x.\r
+ else return y;\r
+ }\r
+\r
+// TaggedPrec(x,tag): returns the first node tagged tag with smaller preorder than x and not an\r
+// ancestor of x. Returns NULLT if there is none.\r
+treeNode XMLTree::TaggedPrec(treeNode x, TagType tag) \r
+ {\r
+ int r, s;\r
+ treeNode node_s, root;\r
+ r = (int)Tags->rank(tag, node2tagpos(x)-1);\r
+ if (r==0) return NULLT; // there is no such node.\r
+ s = (int)Tags->select(tag, r);\r
+ root = root_node(Par);\r
+ node_s = tagpos2node(s);\r
+ while (is_ancestor(Par, node_s, x) && (node_s!=root)) { // the one that we found is an ancestor of x\r
+ r--;\r
+ if (r==0) return NULLT; // there is no such node\r
+ s = (int)Tags->select(tag, r); // we should use select_prev instead when provided\r
+ node_s = tagpos2node(s);\r
+ }\r
+ return NULLT; // there is no such node \r
+ }\r
+\r
+// TaggedFoll(x,tag): returns the first node tagged tag with larger preorder than x and not in\r
+// the subtree of x. Returns NULLT if there is none.\r
+treeNode XMLTree::TaggedFoll(treeNode x, TagType tag) \r
+ {\r
+ int r, s;\r
+ r = (int) Tags->rank(tag, node2tagpos(next_sibling(Par, x))-1);\r
+ s = (int) Tags->select(tag, r+1); // select returns -1 in case that there is no r+1-th tag.\r
+ if (s==-1) return NULLT;\r
+ else return tagpos2node(s);\r
+ }\r
+\r
+// PrevText(x): returns the document identifier of the text to the left \r
+// of node x, or NULLT if x is the root node or the text is empty.\r
+// Assumes Doc ids start from 0.\r
+DocID XMLTree::PrevText(treeNode x) \r
+ {\r
+ if (x == Root()) return NULLT;\r
+ if (indexing_empty_texts) // faster, no rank needed\r
+ return (DocID)x-1;\r
+ else { // we are not indexing empty texts, rank is needed\r
+ if (EBVector->access(x-1) == 0) \r
+ return (DocID)NULLT; // there is no text to the left of node (text is empty)\r
+ else\r
+ return (DocID)EBVector->rank1(x-1)-1; //-1 because document ids start from 0\r
+ }\r
+ }\r
+\r
+// NextText(x): returns the document identifier of the text to the right\r
+// of node x, or NULLT if x is the root node. Assumes Doc ids start from 0.\r
+DocID XMLTree::NextText(treeNode x) \r
+ {\r
+ if (x == Root()) return NULLT;\r
+ if (indexing_empty_texts) // faster, no rank needed\r
+ return (DocID)x+2*subtree_size(Par, x)-1;\r
+ else { // we are not indexing empty texts, rank is needed\r
+ int p = x+2*subtree_size(Par, x)-1;\r
+ if (EBVector->access(p) == 0) // there is no text to the right of node\r
+ return (DocID)NULLT;\r
+ else\r
+ return (DocID)EBVector->rank1(p)-1; //-1 because document ids start from 0\r
+ }\r
+ }\r
+\r
+// MyText(x): returns the document identifier of the text below node x, \r
+// or NULLT if x is not a leaf node or the text is empty. Assumes Doc \r
+// ids start from 0.\r
+DocID XMLTree::MyText(treeNode x) \r
+ {\r
+ if (!IsLeaf(x)) return NULLT;\r
+ if (indexing_empty_texts) // faster, no rank needed\r
+ return (DocID)x;\r
+ else { // we are not indexing empty texts, rank is needed\r
+ if (EBVector->access(x) == 0) // there is no text below node x\r
+ return (DocID)NULLT;\r
+ else\r
+ return (DocID)EBVector->rank1(x)-1; //-1 because document ids start from 0\r
+ } \r
+ }\r
+\r
+// TextXMLId(d): returns the preorder of document with identifier d in the tree consisting of\r
+// all tree nodes and all text nodes. Assumes that the tree root has preorder 1.\r
+int XMLTree::TextXMLId(DocID d) \r
+ {\r
+ if (indexing_empty_texts) \r
+ return d + rank_open(Par, d)+1; // +1 because root has preorder 1\r
+ else { // slower, needs rank and select\r
+ int s = EBVector->select1(d+1);\r
+ return rank_open(Par, s) + d + 1; // +1 because root has preorder 1\r
+ }\r
+ }\r
+\r
+// NodeXMLId(x): returns the preorder of node x in the tree consisting \r
+// of all tree nodes and all text nodes. Assumes that the tree root has\r
+// preorder 0;\r
+int XMLTree::NodeXMLId(treeNode x) \r
+ {\r
+ if (indexing_empty_texts)\r
+ return x - 1 + rank_open(Par, x);\r
+ else {\r
+ if (x == Root()) return 1; // root node has preorder 1\r
+ else\r
+ return rank_open(Par, x) + EBVector->rank1(x-1);\r
+ }\r
+ }\r
+\r
+// ParentNode(d): returns the parent node of document identifier d.\r
+treeNode XMLTree::ParentNode(DocID d) \r
+ {\r
+ int s;\r
+ if (indexing_empty_texts) s = d;\r
+ else s = EBVector->select1(d);\r
+ \r
+ if (inspect(Par,s) == CP) // is a closing parenthesis\r
+ return parent(Par, find_open(Par, s));\r
+ else // is an opening parenthesis\r
+ return (treeNode)s;\r
+ \r
+ }\r
+\r
+\r
+// OpenDocument(empty_texts): it starts the construction of the data structure for\r
+// the XML document. Parameter empty_texts indicates whether we index empty texts\r
+// in document or not. Returns a non-zero value upon success, NULLT in case of error.\r
+int XMLTree::OpenDocument(bool empty_texts, int sample_rate_text)\r
+ {\r
+ initialized = true;\r
+ finished = false;\r
+ npar = 0;\r
+ ntagnames = 0;\r
+ \r
+ indexing_empty_texts = empty_texts;\r
+ \r
+ par_aux = (pb *)malloc(sizeof(pb));\r
+ if (!par_aux) {\r
+ fprintf(stderr, "Error: not enough memory\n");\r
+ return NULLT;\r
+ }\r
+ setbit(par_aux,npar,OP); // marks a new opening parenthesis for the tree root\r
+ npar++;\r
+ \r
+ tags_aux = (TagType *)malloc(sizeof(TagType));\r
+ if (!tags_aux) {\r
+ fprintf(stderr, "Error: not enough memory\n");\r
+ return NULLT;\r
+ }\r
+ \r
+ if (!indexing_empty_texts) {\r
+ empty_texts_aux = (unsigned int *)malloc(sizeof(unsigned int));\r
+ if (!empty_texts_aux) {\r
+ fprintf(stderr, "Error: not enough memory\n");\r
+ return NULLT;\r
+ }\r
+ }\r
+ \r
+ //Text = TextCollection::InitTextCollection(sample_rate_text);\r
+ \r
+ return 1; // indicates success in the initialization of the data structure\r
+ }\r
+\r
+// CloseDocument(): it finishes the construction of the data structure for the XML\r
+// document. Tree and tags are represented in the final form, dynamic data \r
+// structures are made static, and the flag "finished" is set to true. After that, \r
+// the data structure can be queried.\r
+int XMLTree::CloseDocument()\r
+ {\r
+ if (!initialized) { // data structure has not been initialized properly\r
+ fprintf(stderr, "Error: data structure has not been initialized properly (by calling method OpenDocument)\n");\r
+ return NULLT;\r
+ }\r
+ \r
+ // closing parenthesis for the tree root\r
+ par_aux = (pb *)realloc(par_aux, sizeof(pb)*(1+npar/(8*sizeof(pb))));\r
+ if (!par_aux) {\r
+ fprintf(stderr, "Error: not enough memory\n");\r
+ return NULLT; \r
+ }\r
+ setbit(par_aux,npar,CP); \r
+ npar++;\r
+ \r
+ // creates the data structure for the tree topology\r
+ Par = (bp *)malloc(sizeof(bp)); \r
+ bp_construct(Par, npar, par_aux, OPT_DEGREE|0); \r
+ // creates structure for tags\r
+ alphabet_mapper * am = new alphabet_mapper_none();\r
+ static_bitsequence_builder * bmb = new static_bitsequence_builder_rrr02(32); \r
+ wt_coder * wtc = new wt_coder_huff((uint *)tags_aux,npar-1,am);\r
+ Tags = new static_sequence_wvtree((uint *) tags_aux, (uint) npar-1, wtc, bmb, am);\r
+\r
+ // makes the text collection static\r
+ //Text->MakeStatic();\r
+ \r
+ // creates the data structure marking the non-empty texts (just in the case it is necessary)\r
+ if (!indexing_empty_texts) \r
+ EBVector = new static_bitsequence_rrr02((uint *)empty_texts_aux,(ulong)npar,(uint)32);\r
+\r
+ finished = true;\r
+\r
+ return 1; // indicates success in the inicialization\r
+ }\r
+\r
+\r
+// NewOpenTag(tagname): indicates the event of finding a new opening tag in the document.\r
+// Tag name is given. Returns a non-zero value upon success, and returns NULLT\r
+// in case of failing when trying to insert the new tag.\r
+int XMLTree::NewOpenTag(unsigned char *tagname)\r
+ {\r
+ int i;\r
+\r
+ if (!initialized) { // data structure has not been initialized properly\r
+ fprintf(stderr, "Error: you cannot insert a new opening tag without first calling method OpenDocument first\n");\r
+ return NULLT;\r
+ }\r
+ \r
+ // inserts a new opening parentheses in the bit sequence\r
+ par_aux = (pb *)realloc(par_aux, sizeof(pb)*(1+npar/(8*sizeof(pb))));\r
+ if (!par_aux) {\r
+ fprintf(stderr, "Error: not enough memory\n");\r
+ return NULLT; \r
+ }\r
+ setbit(par_aux,npar,OP); // marks a new opening parenthesis\r
+\r
+ // transforms the tagname into a tag identifier. If the tag is new, we insert\r
+ // it in the table.\r
+ for (i=0; i<ntagnames; i++)\r
+ if (strcmp((const char *)tagname,(const char *)TagName[i])==0) break;\r
+ \r
+ if (i==ntagnames) { // the tag is a new one, then we insert it\r
+ TagName = (unsigned char **)realloc(TagName, sizeof(char *)*(ntagnames+1));\r
+ \r
+ if (!TagName) {\r
+ fprintf(stderr, "Error: not enough memory\n");\r
+ return NULLT;\r
+ }\r
+ \r
+ ntagnames++;\r
+ TagName[i] = (unsigned char *)malloc(sizeof(unsigned char)*(strlen((const char *)tagname)+1));\r
+ strcpy((char *)TagName[i], (const char *)tagname);\r
+ } \r
+ \r
+ tags_aux = (TagType *)realloc(tags_aux, sizeof(TagType)*(npar + 1));\r
+\r
+ if (!tags_aux) {\r
+ fprintf(stderr, "Error: not enough memory\n");\r
+ return NULLT;\r
+ }\r
+\r
+ tags_aux[npar] = i; // inserts the new tag id within the preorder sequence of tags\r
+ \r
+ npar++;\r
+\r
+ return 1;\r
+ \r
+ }\r
+\r
+\r
+// NewClosingTag(tagname): indicates the event of finding a new closing tag in the document.\r
+// Tag name is given. Returns a non-zero value upon success, and returns NULLT\r
+// in case of failing when trying to insert the new tag.\r
+int XMLTree::NewClosingTag(unsigned char *tagname)\r
+ {\r
+ int i;\r
+\r
+ if (!initialized) { // data structure has not been initialized properly\r
+ fprintf(stderr, "Error: you cannot insert a new closing tag without first calling method OpenDocument first\n");\r
+ return NULLT;\r
+ }\r
+ \r
+ // inserts a new closing parentheses in the bit sequence\r
+ par_aux = (pb *)realloc(par_aux, sizeof(pb)*(1+npar/(8*sizeof(pb))));\r
+ if (!par_aux) {\r
+ fprintf(stderr, "Error: not enough memory\n");\r
+ return NULLT; \r
+ }\r
+ setbit(par_aux,npar,CP); // marks a new closing opening parenthesis\r
+\r
+ // transforms the tagname into a tag identifier. If the tag is new, we insert\r
+ // it in the table.\r
+ for (i=0; i<ntagnames; i++)\r
+ if (strcmp((const char *)tagname,(const char *)TagName[i])==0) break;\r
+ \r
+ if (i==ntagnames) { // the tag is a new one, then we insert it\r
+ TagName = (unsigned char **)realloc(TagName, sizeof(char *)*(ntagnames+1));\r
+ \r
+ if (!TagName) {\r
+ fprintf(stderr, "Error: not enough memory\n");\r
+ return NULLT;\r
+ }\r
+ \r
+ ntagnames++;\r
+ TagName[i] = (unsigned char *)malloc(sizeof(char)*(strlen((const char *)tagname)+1));\r
+ strcpy((char *)TagName[i], (const char *)tagname);\r
+ } \r
+\r
+ tags_aux = (TagType *)realloc(tags_aux, sizeof(TagType)*(npar + 1));\r
+\r
+ if (!tags_aux) {\r
+ fprintf(stderr, "Error: not enough memory\n");\r
+ return NULLT;\r
+ }\r
+\r
+ tags_aux[npar] = i; // inserts the new tag id within the preorder sequence of tags\r
+ \r
+ npar++;\r
+\r
+ return 1; // success\r
+ \r
+ }\r
+\r
+\r
+// NewText(s): indicates the event of finding a new (non-empty) text s in the document.\r
+// The new text is inserted within the text collection. Returns a non-zero value upon\r
+// success, NULLT in case of error.\r
+int XMLTree::NewText(unsigned char *s)\r
+ {\r
+ if (!initialized) { // data structure has not been initialized properly\r
+ fprintf(stderr, "Error: you cannot insert a new text without first calling method OpenDocument first\n");\r
+ return NULLT;\r
+ }\r
+\r
+ if (!indexing_empty_texts) {\r
+ empty_texts_aux = (unsigned int *)realloc(empty_texts_aux, sizeof(pb)*(1+(npar-1)/(8*sizeof(pb))));\r
+ if (!empty_texts_aux) {\r
+ fprintf(stderr, "Error: not enough memory\n");\r
+ return NULLT;\r
+ }\r
+ \r
+ bitset(empty_texts_aux, npar-1); // marks the non-empty text with a 1 in the bit vector\r
+ }\r
+ \r
+ //Text->InsertText(s);\r
+ \r
+ return 1; // success\r
+ }\r
+\r
+// NewEmptyText(): indicates the event of finding a new empty text in the document.\r
+// In case of indexing empty and non-empty texts, we insert the empty texts into the\r
+// text collection. In case of indexing only non-empty texts, it just indicates an\r
+// empty text in the bit vector of empty texts. Returns a non-zero value upon\r
+// success, NULLT in case of error.\r
+int XMLTree::NewEmptyText() \r
+ {\r
+ unsigned char c = 0;\r
+ if (!initialized) { // data structure has not been initialized properly\r
+ fprintf(stderr, "Error: you cannot insert a new empty text without first calling method OpenDocument first\n");\r
+ return NULLT;\r
+ }\r
+\r
+ if (!indexing_empty_texts) {\r
+ empty_texts_aux = (unsigned int *)realloc(empty_texts_aux, sizeof(pb)*(1+(npar-1)/(8*sizeof(pb))));\r
+ if (!empty_texts_aux) {\r
+ fprintf(stderr, "Error: not enough memory\n");\r
+ return NULLT;\r
+ }\r
+ \r
+ bitclean(empty_texts_aux, npar-1); // marks the empty text with a 0 in the bit vector\r
+ }\r
+ // else Text->InsertText(&c); // we insert the empty text just in case we index all the texts\r
+ \r
+ return 1; // success \r
+ }\r
+\r
+\r
+// GetTagId: returns the tag identifier corresponding to a given tag name.\r
+// Returns NULLT in case that the tag name does not exists.\r
+TagType XMLTree::GetTagId(unsigned char *tagname)\r
+ {\r
+ int i;\r
+ // this should be changed for more efficient processing\r
+ for (i=0; i<ntagnames; i++)\r
+ if (strcmp((const char *)tagname,(const char *)TagName[i])==0) break; \r
+ if (i==ntagnames) return (TagType)NULLT; // tagname does not exists in the table\r
+ else return i;\r
+ }\r
+\r
+\r
+// GetTagName(tagid): returns the tag name of a given tag identifier.\r
+// Returns NULL in case that the tag identifier is not valid.\r
+unsigned char *XMLTree::GetTagName(TagType tagid)\r
+ {\r
+ unsigned char *s;\r
+\r
+ if (tagid >= ntagnames) return NULL; // invalid tag identifier\r
+ s = (unsigned char *)malloc((strlen((const char *)TagName[tagid])+1)*sizeof(unsigned char));\r
+ strcpy((char *)s, (const char *)TagName[tagid]);\r
+ return s;\r
+ }\r
+\r
--- /dev/null
+\r
+/******************************************************************************\r
+ * Copyright (C) 2008 by Diego Arroyuelo *\r
+ * Interface for the in-memory XQuery/XPath engine *\r
+ * *\r
+ * This program is free software; you can redistribute it and/or modify *\r
+ * it under the terms of the GNU Lesser General Public License as published *\r
+ * by the Free Software Foundation; either version 2 of the License, or *\r
+ * (at your option) any later version. *\r
+ * *\r
+ * This program is distributed in the hope that it will be useful, *\r
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *\r
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *\r
+ * GNU Lesser General Public License for more details. *\r
+ * *\r
+ * You should have received a copy of the GNU Lesser General Public License *\r
+ * along with this program; if not, write to the *\r
+ * Free Software Foundation, Inc., *\r
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *\r
+ ******************************************************************************/ \r
+\r
+#include <stdio.h>\r
+#include <stdlib.h>\r
+#include "bp.h"\r
+#include "libcds/includes/static_bitsequence.h"\r
+#include "libcds/includes/alphabet_mapper.h"\r
+#include "libcds/includes/static_sequence.h"\r
+\r
+//#include "TextCollection/TextCollection.h"\r
+//using SXSI::TextCollection;\r
+\r
+// this constant is used to efficiently compute the child operation in the tree\r
+#define OPTD 10\r
+\r
+#define NULLT -1\r
+\r
+ // sets bit p in e\r
+#define bitset(e,p) ((e)[(p)/W] |= (1<<((p)%W)))\r
+ // cleans bit p in e\r
+#define bitclean(e,p) ((e)[(p)/W] &= ~(1<<((p)%W)))\r
+\r
+\r
+typedef int treeNode;\r
+typedef int TagType; \r
+typedef int DocID; \r
+\r
+typedef struct {\r
+ int min;\r
+ int max;\r
+} range;\r
+\r
+\r
+class XMLTree {\r
+ /** Balanced parentheses representation of the tree */\r
+ bp *Par;\r
+ \r
+ /** Mapping from tag identifer to tag name */ \r
+ unsigned char **TagName;\r
+ \r
+ /** boolean flag indicating whether we are indexing empty texts or not */\r
+ bool indexing_empty_texts; \r
+ \r
+ /** Bit vector indicating with a 1 the positions of the non-empty texts. */\r
+ static_bitsequence_rrr02 *EBVector; \r
+ \r
+ /** Tag sequence represented with a data structure for rank and select */\r
+ static_sequence_wvtree *Tags;\r
+\r
+ /** The texts in the XML document */\r
+ //TextCollection *Text;\r
+ \r
+ /** Flag indicating whether the whole data structure has been constructed or \r
+ * not. If the value is true, you cannot add more texts, nodes, etc. */\r
+ bool finished;\r
+\r
+ /** Flag indicating whether the construction of the data structure has been\r
+ * initialized or not (by calling method OpenDocument()). If this is true,\r
+ * you cannot insert new tags or texts. */\r
+ bool initialized;\r
+ \r
+ /* the following components are used for construction purposes */\r
+ pb *par_aux;\r
+ TagType *tags_aux;\r
+ int npar;\r
+ int ntagnames;\r
+ unsigned int *empty_texts_aux;\r
+ \r
+public:\r
+\r
+ /** Data structure constructor */\r
+ XMLTree() {finished = false; initialized = false;}; \r
+ \r
+ /** Data structure destructor */\r
+ ~XMLTree();\r
+ \r
+ /** root(): returns the tree root. */\r
+ treeNode Root();\r
+ \r
+ /** SubtreeSize(x): the number of nodes (and attributes) in the subtree of \r
+ * node x. */\r
+ int SubtreeSize(treeNode x);\r
+ \r
+ /** SubtreeTags(x,tag): the number of occurrences of tag within the subtree \r
+ * of node x. */\r
+ int SubtreeTags(treeNode x, TagType tag);\r
+ \r
+ /** IsLeaf(x): returns whether node x is leaf or not. In the succinct \r
+ * representation this is just a bit inspection. */\r
+ bool IsLeaf(treeNode x);\r
+ \r
+ /** IsAncestor(x,y): returns whether node x is ancestor of node y. */\r
+ bool IsAncestor(treeNode x, treeNode y);\r
+ \r
+ /** IsChild(x,y): returns whether node x is parent of node y. */\r
+ bool IsChild(treeNode x, treeNode y);\r
+ \r
+ /** NumChildren(x): number of children of node x. Constant time with the \r
+ * data structure of Sadakane. */\r
+ int NumChildren(treeNode x);\r
+ \r
+ /** ChildNumber(x): returns i if node x is the i-th children of its \r
+ * parent. */\r
+ inline int ChildNumber(treeNode x);\r
+\r
+ /** Depth(x): depth of node x, a simple binary rank on the parentheses \r
+ * sequence. */\r
+ int Depth(treeNode x);\r
+ \r
+ /** Preorder(x): returns the preorder number of node x, just regarding tree \r
+ * nodes (and not texts). */ \r
+ int Preorder(treeNode x);\r
+ \r
+ /** Postorder(x): returns the postorder number of node x, just regarding \r
+ * tree nodes (and not texts). */\r
+ int Postorder(treeNode x);\r
+ \r
+ /** Tag(x): returns the tag identifier of node x. */\r
+ TagType Tag(treeNode x);\r
+ \r
+ /** DocIds(x): returns the range (i.e., a pair of integers) of document \r
+ * identifiers that descend from node x. */\r
+ range DocIds(treeNode x);\r
+ \r
+ /** Parent(x): returns the parent node of node x. */\r
+ treeNode Parent(treeNode x);\r
+ \r
+ /** Child(x,i): returns the i-th child of node x, assuming it exists. */ \r
+ treeNode Child(treeNode x, int i);\r
+ \r
+ /** FirstChild(x): returns the first child of node x, assuming it exists. \r
+ * Very fast in BP. */\r
+ treeNode FirstChild(treeNode x);\r
+ \r
+ /** NextSibling(x): returns the next sibling of node x, assuming it \r
+ * exists. */\r
+ treeNode NextSibling(treeNode x);\r
+ \r
+ /** PrevSibling(x): returns the previous sibling of node x, assuming it \r
+ * exists. */\r
+ treeNode PrevSibling(treeNode x);\r
+ \r
+ /** TaggedChild(x,i,tag): returns the i-th child of node x tagged tag, or \r
+ * NULLT if there is none. Because of the balanced-parentheses representation \r
+ * of the tree, this operation is not supported efficiently, just iterating \r
+ * among the children of node x until finding the desired child. */\r
+ treeNode TaggedChild(treeNode x, int i, TagType tag);\r
+ \r
+ /** TaggedDesc(x,tag): returns the first node tagged tag with larger \r
+ * preorder than x and within the subtree of x. Returns NULT if there \r
+ * is none. */\r
+ treeNode TaggedDesc(treeNode x, TagType tag);\r
+\r
+ /** TaggedPrec(x,tag): returns the first node tagged tag with smaller \r
+ * preorder than x and not an ancestor of x. Returns NULLT if there \r
+ * is none. */\r
+ treeNode TaggedPrec(treeNode x, TagType tag);\r
+ \r
+ /** TaggedFoll(x,tag): returns the first node tagged tag with larger \r
+ * preorder than x and not in the subtree of x. Returns NULLT if there \r
+ * is none. */\r
+ treeNode TaggedFoll(treeNode x, TagType tag);\r
+ \r
+ /** PrevText(x): returns the document identifier of the text to the left of \r
+ * node x, or NULLT if x is the root node. */\r
+ DocID PrevText(treeNode x);\r
+ \r
+ /** NextText(x): returns the document identifier of the text to the right of \r
+ * node x, or NULLT if x is the root node. */\r
+ DocID NextText(treeNode x);\r
+ \r
+ /** MyText(x): returns the document identifier of the text below node x, or \r
+ * NULLT if x is not a leaf node. */\r
+ DocID MyText(treeNode x);\r
+ \r
+ /** TextXMLId(d): returns the preorder of document with identifier d in the \r
+ * tree consisting of all tree nodes and all text nodes. */\r
+ int TextXMLId(DocID d);\r
+ \r
+ /** NodeXMLId(x): returns the preorder of node x in the tree consisting of \r
+ * all tree nodes and all text nodes. */\r
+ int NodeXMLId(treeNode x);\r
+ \r
+ /** ParentNode(d): returns the parent node of document identifier d. */\r
+ treeNode ParentNode(DocID d);\r
+\r
+ /** OpenDocument(empty_texts,sample_rate_text): initilizes the construction\r
+ * of the data structure for the XML document. Parameter empty_texts \r
+ * indicates whether we index empty texts in document or not. Parameter \r
+ * sample_rate_text indicates the sampling rate for the text searching data\r
+ * structures (small values get faster searching but a bigger space \r
+ * requirement). Returns a non-zero value upon success, NULLT in case of \r
+ * error. */\r
+ int OpenDocument(bool empty_texts, int sample_rate_text);\r
+\r
+ /** CloseDocument(): finishes the construction of the data structure for \r
+ * the XML document. Tree and tags are represented in the final form, \r
+ * dynamic data structures are made static, and the flag "finished" is set \r
+ * to true. After that, the data structure can be queried. */\r
+ int CloseDocument();\r
+\r
+ /** NewOpenTag(tagname): indicates the event of finding a new opening tag \r
+ * in the document. Tag name is given. Returns a non-zero value upon \r
+ * success, and returns NULLT in case of error. */\r
+ int NewOpenTag(unsigned char *tagname);\r
+ \r
+ /** NewClosingTag(tagname): indicates the event of finding a new closing tag\r
+ * in the document. Tag name is given. Returns a non-zero value upon \r
+ * success, and returns NULLT in case of error. */\r
+ int NewClosingTag(unsigned char *tagname);\r
+ \r
+ /** NewText(s): indicates the event of finding a new (non-empty) text s in \r
+ * the document. The new text is inserted within the text collection. \r
+ * Returns a non-zero value upon success, NULLT in case of error. */\r
+ int NewText(unsigned char *s);\r
+\r
+ /** NewEmptyText(): indicates the event of finding a new empty text in the \r
+ * document. In case of indexing empty and non-empty texts, we insert the \r
+ * empty texts into the text collection. In case of indexing only non-empty\r
+ * texts, it just indicates an empty text in the bit vector of empty texts. \r
+ * Returns a non-zero value upon success, NULLT in case of error. */\r
+ int NewEmptyText();\r
+\r
+ /** GetTagId(tagname): returns the tag identifier corresponding to a given \r
+ * tag name. Returns NULLT in case that the tag name does not exists. */\r
+ TagType GetTagId(unsigned char *tagname);\r
+\r
+ /** GetTagName(tagid): returns the tag name of a given tag identifier. \r
+ * Returns NULL in case that the tag identifier is not valid.*/\r
+ unsigned char *GetTagName(TagType tagid);\r
+\r
+ /** saveXMLTree: saves XML tree data structure to file. Every component of \r
+ * the collection is stored in different files (same name, different file \r
+ * extensions). */\r
+ void Save(unsigned char *filename);\r
+ \r
+ /** load: loads XML tree data structure from file. */\r
+ static XMLTree *Load(unsigned char *filename, int sample_rate_text); \r
+};\r
+\r
+\r
--- /dev/null
+#include "bp.h"\r
+\r
+//#define CHECK\r
+#define RANDOM\r
+\r
+int msize=0;\r
+#define mymalloc(p,n,f) {p =(__typeof__(p)) malloc((n)*sizeof(*p)); msize += (f)*(n)*sizeof(*p); /* if (f) printf("malloc %d bytes at line %d total %d\n",(n)*sizeof(*p),__LINE__,msize); */ if ((p)==NULL) {printf("not enough memory (%d bytes) in line %d\n",msize,__LINE__); exit(1);};}\r
+\r
+int postorder_select_bsearch(bp *b,int s);\r
+\r
+int naive_depth(bp *b, int s)\r
+{\r
+ int i,d;\r
+ if (s < 0) return 0;\r
+ d = 0;\r
+ for (i=0; i<=s; i++) {\r
+ if (getbit(b->B,i)==OP) {\r
+ d++;\r
+ } else {\r
+ d--;\r
+ }\r
+ }\r
+ return d;\r
+}\r
+\r
+void printbp(bp *b, int s, int t)\r
+{\r
+ int i,c,d;\r
+ d = 0;\r
+ for (i=s; i<=t; i++) {\r
+ if (getbit(b->B,i)==OP) {\r
+ c = '(';\r
+ d++;\r
+ } else {\r
+ c = ')';\r
+ d--;\r
+ }\r
+ putchar(c);\r
+ }\r
+}\r
+\r
+int *matchtbl,*parenttbl;\r
+void make_naivetbl(pb *B,int n)\r
+{\r
+ int i,v;\r
+ int *stack,s;\r
+\r
+ mymalloc(matchtbl,n,0);\r
+ mymalloc(parenttbl,n,0);\r
+ mymalloc(stack,n,0);\r
+\r
+ for (i=0; i<n; i++) matchtbl[i] = parenttbl[i] = -1;\r
+\r
+ s = 0;\r
+ v = 0;\r
+ stack[0] = -1;\r
+ for (i=0; i<n; i++) {\r
+ if (getbit(B,i)==OP) {\r
+ v++;\r
+ if (v > 0) {\r
+ parenttbl[i] = stack[v-1];\r
+ stack[v] = i;\r
+ }\r
+ } else {\r
+ if (v > 0) {\r
+ matchtbl[stack[v]] = i; // close\r
+ matchtbl[i] = stack[v]; // open\r
+ }\r
+ v--;\r
+ }\r
+ }\r
+\r
+ free(stack);\r
+}\r
+\r
+int popCount[1<<ETW];\r
+int fwdtbl[(2*ETW+1)*(1<<ETW)];\r
+int bwdtbl[(2*ETW+1)*(1<<ETW)];\r
+#if 0\r
+int mintbl_li[1<<ETW], mintbl_lv[1<<ETW];\r
+int mintbl_ri[1<<ETW], mintbl_rv[1<<ETW];\r
+int maxtbl_li[1<<ETW], maxtbl_lv[1<<ETW];\r
+int maxtbl_ri[1<<ETW], maxtbl_rv[1<<ETW];\r
+#endif\r
+int minmaxtbl_i[4][1<<ETW], minmaxtbl_v[4][1<<ETW];\r
+int degtbl[1<<ETW];\r
+int degtbl2[(2*ETW+1)*(1<<ETW)];\r
+int childtbl[(ETW)*(1<<ETW)];\r
+int childtbl2[2*ETW+1][ETW][(1<<ETW)];\r
+int depthtbl[(2*ETW+1)*(1<<ETW)];\r
+\r
+void make_matchtbl(void)\r
+{\r
+ int i,j,x,r;\r
+ int m,M;\r
+ pb buf[1];\r
+ int deg;\r
+\r
+ for (x = 0; x < (1<<ETW); x++) {\r
+ setbits(buf,0,ETW,x);\r
+ for (r=-ETW; r<=ETW; r++) fwdtbl[((r+ETW)<<ETW)+x] = ETW;\r
+ for (r=-ETW; r<=ETW; r++) bwdtbl[((r+ETW)<<ETW)+x] = ETW;\r
+ for (r=-ETW; r<=ETW; r++) degtbl2[((r+ETW)<<ETW)+x] = 0;\r
+ for (r=-ETW; r<=ETW; r++) depthtbl[((r+ETW)<<ETW)+x] = 0;\r
+\r
+ r = 0;\r
+ for (i=0; i<ETW; i++) {\r
+ if (getbit(buf,i)==OP) {\r
+ r++;\r
+ } else {\r
+ r--;\r
+ }\r
+ if (fwdtbl[((r+ETW)<<ETW)+x] == ETW) fwdtbl[((r+ETW)<<ETW)+x] = i;\r
+ }\r
+\r
+ r = 0;\r
+ for (i=ETW-1; i>=0; i--) {\r
+ if (getbit(buf,i)==OP) {\r
+ r--;\r
+ } else {\r
+ r++;\r
+ }\r
+ if (bwdtbl[((r+ETW)<<ETW)+x] == ETW) bwdtbl[((r+ETW)<<ETW)+x] = ETW-1-i;\r
+ }\r
+\r
+ r = 0;\r
+ for (i=0; i<ETW; i++) {\r
+ if (getbit(buf,i)==OP) {\r
+ r++;\r
+ } else {\r
+ r--;\r
+ }\r
+ depthtbl[((r+ETW)<<ETW)+x] += (1<<(ETW-1));\r
+ }\r
+\r
+ r = 0;\r
+ for (i=0; i<ETW; i++) {\r
+ if (getbit(buf,i)==OP) r++;\r
+ }\r
+ popCount[x] = r;\r
+\r
+ r = 0; m = 0; M = 0;\r
+ m = ETW+1; M = -ETW-1;\r
+ //maxtbl_lv[x] = -ETW-1;\r
+ //mintbl_lv[x] = ETW+1;\r
+ minmaxtbl_v[OPT_MAX | OPT_LEFT][x] = -ETW-1;\r
+ minmaxtbl_v[OPT_MIN | OPT_LEFT][x] = ETW+1;\r
+ deg = 0;\r
+ for (i=0; i<ETW; i++) {\r
+ if (getbit(buf,i)==OP) {\r
+ r++;\r
+ if (r > M) {\r
+ M = r; \r
+ //maxtbl_li[x] = i; maxtbl_lv[x] = r;\r
+ minmaxtbl_i[OPT_MAX | OPT_LEFT][x] = i;\r
+ minmaxtbl_v[OPT_MAX | OPT_LEFT][x] = r;\r
+ }\r
+ } else {\r
+ r--;\r
+ if (r == m) {\r
+ deg++;\r
+ childtbl[((deg-1)<<ETW) + x] = i;\r
+ }\r
+ if (r < m) {\r
+ m = r;\r
+ //mintbl_li[x] = i; mintbl_lv[x] = r;\r
+ minmaxtbl_i[OPT_MIN | OPT_LEFT][x] = i;\r
+ minmaxtbl_v[OPT_MIN | OPT_LEFT][x] = r;\r
+ deg = 1;\r
+ childtbl[((deg-1)<<ETW) + x] = i;\r
+ }\r
+ }\r
+ if (r <= m) degtbl2[((r+ETW)<<ETW)+x]++;\r
+ }\r
+ degtbl[x] = deg;\r
+\r
+ r = 0; m = 0; M = 0;\r
+ //maxtbl_rv[x] = -ETW-1;\r
+ //mintbl_rv[x] = ETW+1;\r
+ minmaxtbl_v[OPT_MAX | OPT_RIGHT][x] = -ETW-1;\r
+ minmaxtbl_v[OPT_MIN | OPT_RIGHT][x] = ETW+1;\r
+ for (i=0; i<ETW; i++) {\r
+ if (getbit(buf,i)==OP) {\r
+ r++;\r
+ if (r >= M) {\r
+ M = r; \r
+ //maxtbl_ri[x] = i; maxtbl_rv[x] = r;\r
+ minmaxtbl_i[OPT_MAX | OPT_RIGHT][x] = i;\r
+ minmaxtbl_v[OPT_MAX | OPT_RIGHT][x] = r;\r
+ }\r
+ } else {\r
+ r--;\r
+ if (r <= m) {\r
+ m = r;\r
+ //mintbl_ri[x] = i; mintbl_rv[x] = r;\r
+ minmaxtbl_i[OPT_MIN | OPT_RIGHT][x] = i;\r
+ minmaxtbl_v[OPT_MIN | OPT_RIGHT][x] = r;\r
+ }\r
+ }\r
+ }\r
+\r
+ for (i = 0; i < ETW; i++) {\r
+ for (j = -ETW; j <= ETW; j++) {\r
+ childtbl2[j+ETW][i][x] = -1;\r
+ }\r
+ }\r
+\r
+ for (j=-ETW; j<=ETW; j++) {\r
+ int ith;\r
+ ith = 0;\r
+ r = 0;\r
+ for (i = 0; i < ETW; i++) {\r
+ if (getbit(buf,i)==OP) {\r
+ r++;\r
+ } else {\r
+ r--;\r
+ if (r < j) break;\r
+ if (r == j) {\r
+ ith++;\r
+ childtbl2[j+ETW][ith-1][x] = i;\r
+ }\r
+ }\r
+ }\r
+ }\r
+ }\r
+\r
+}\r
+\r
+\r
+int bp_construct(bp *b,int n, pb *B, int opt)\r
+{\r
+ int i,j,d;\r
+ int m,M,ds;\r
+ int ns,nm;\r
+ byte *sm, *sM;\r
+ byte *sd;\r
+ int *mm, *mM;\r
+ int *md;\r
+ int m_ofs;\r
+ int r; // # of minimum values\r
+\r
+#if 0\r
+ if (SB % D != 0) {\r
+ printf("warning: SB=%d should be a multiple of D=%d\n",SB,D);\r
+ // not necessarily?\r
+ }\r
+ if (SB % RRR != 0) {\r
+ printf("warning: SB=%d should be a multiple of RRR=%d\n",SB,RRR);\r
+ }\r
+#endif\r
+\r
+ b->B = B;\r
+ b->n = n;\r
+ b->opt = opt;\r
+ b->idx_size = 0;\r
+ b->sm = NULL;\r
+ b->sM = NULL;\r
+ b->sd = NULL;\r
+ b->mm = NULL;\r
+ b->mM = NULL;\r
+ b->md = NULL;\r
+ b->da_leaf = NULL;\r
+ b->da_inorder = NULL;\r
+ b->da_postorder = NULL;\r
+ b->da_dfuds_leaf = NULL;\r
+ mymalloc(b->da,1,0);\r
+ darray_construct(b->da,n,B, opt & OPT_FAST_PREORDER_SELECT);\r
+ b->idx_size += b->da->idx_size;\r
+ printf("preorder rank/select table: %d bytes (%1.2f bpc)\n",b->da->idx_size,(double)b->da->idx_size*8/n);\r
+\r
+ make_matchtbl();\r
+\r
+ ns = (n+SB-1)/SB;\r
+ mymalloc(sm, ns, 0); b->idx_size += ns * sizeof(*sm);\r
+ mymalloc(sM, ns, 0); b->idx_size += ns * sizeof(*sM);\r
+ b->sm = sm;\r
+ b->sM = sM;\r
+ if (opt & OPT_DEGREE) {\r
+ mymalloc(sd, ns, 0); b->idx_size += ns * sizeof(*sd);\r
+ b->sd = sd;\r
+ printf("SB degree table: %d bytes (%1.2f bpc)\n",ns * sizeof(*sd), (double)ns * sizeof(*sd) * 8/n);\r
+ }\r
+ printf("SB table: %d bytes (%1.2f bpc)\n",ns * sizeof(*sm) * 2, (double)ns * sizeof(*sm)*2 * 8/n);\r
+\r
+ for (i=0; i<n; i++) {\r
+ if (i % SB == 0) {\r
+ ds = depth(b,i);\r
+ m = M = ds;\r
+ r = 1;\r
+ } else {\r
+ d = depth(b,i);\r
+ if (d == m) r++;\r
+ if (d < m) {\r
+ m = d;\r
+ r = 1;\r
+ }\r
+ if (d > M) M = d;\r
+ }\r
+ if (i % SB == SB-1 || i==n-1) {\r
+ ds = depth(b,(i/SB)*SB-1);\r
+ if (m - ds + SB < 0 || m - ds + SB > 255) {\r
+ printf("error m=%d ds=%d\n",m,ds);\r
+ }\r
+ if (M - ds + 1 < 0 || M - ds + 1 > 255) {\r
+ printf("error M=%d ds=%d\n",M,ds);\r
+ }\r
+ sm[i/SB] = m - ds + SB;\r
+ sM[i/SB] = M - ds + 1;\r
+ if (opt & OPT_DEGREE) sd[i/SB] = r;\r
+ }\r
+ }\r
+\r
+#if 0\r
+ printf("sd: ");\r
+ for (i=0;i<n/SB;i++) printf("%d ",sd[i]);\r
+ printf("\n");\r
+#endif\r
+\r
+\r
+ nm = (n+MB-1)/MB;\r
+ m_ofs = 1 << blog(nm-1);\r
+ b->m_ofs = m_ofs;\r
+\r
+ mymalloc(mm, nm + m_ofs, 0); b->idx_size += (nm+m_ofs) * sizeof(*mm);\r
+ mymalloc(mM, nm + m_ofs, 0); b->idx_size += (nm+m_ofs) * sizeof(*mM);\r
+ b->mm = mm;\r
+ b->mM = mM;\r
+ if (opt & OPT_DEGREE) {\r
+ mymalloc(md, nm + m_ofs, 0); b->idx_size += (nm+m_ofs) * sizeof(*md);\r
+ b->md = md;\r
+ printf("MB degree table: %d bytes (%1.2f bpc)\n",(nm+m_ofs) * sizeof(*md), (double)(nm+m_ofs) * sizeof(*md) * 8/n);\r
+ }\r
+ printf("MB table: %d bytes (%1.2f bpc)\n",(nm+m_ofs) * sizeof(*mm) * 2, (double)(nm+m_ofs) * sizeof(*mm)*2 * 8/n);\r
+\r
+ for (i=0; i<n; i++) {\r
+ d = depth(b,i);\r
+ if (i % MB == 0) {\r
+ m = M = d;\r
+ r = 1;\r
+ } else {\r
+ if (d == m) r++;\r
+ if (d < m) {\r
+ m = d;\r
+ r = 1;\r
+ }\r
+ if (d > M) M = d;\r
+ }\r
+ if (i % MB == MB-1 || i==n-1) {\r
+ mm[m_ofs+ i/MB] = m;\r
+ mM[m_ofs+ i/MB] = M;\r
+ if (opt & OPT_DEGREE) md[m_ofs+ i/MB] = r;\r
+ }\r
+ }\r
+ \r
+ for (j=m_ofs-1; j > 0; j--) {\r
+ m = 0;\r
+ if (j*2 < nm + m_ofs) m = mm[j*2];\r
+ if (j*2+1 < nm + m_ofs) m = min(m,mm[j*2+1]);\r
+ M = 0;\r
+ if (j*2 < nm + m_ofs) M = mM[j*2];\r
+ if (j*2+1 < nm + m_ofs) M = max(M,mM[j*2+1]);\r
+ mm[j] = m; mM[j] = M;\r
+ if (opt & OPT_DEGREE) {\r
+ d = 0;\r
+ if (j*2 < nm + m_ofs) d = md[j*2];\r
+ if (j*2+1 < nm + m_ofs) {\r
+ if (mm[j*2] == mm[j*2+1]) d += md[j*2+1];\r
+ if (mm[j*2] > mm[j*2+1]) d = md[j*2+1];\r
+ }\r
+ md[j] = d;\r
+ }\r
+ }\r
+ mm[0] = -1;\r
+ mM[0] = mM[1];\r
+ if (opt & OPT_DEGREE) {\r
+ md[0] = -1;\r
+ }\r
+\r
+\r
+#if 0\r
+ printf("md: ");\r
+ for (i=0;i<m_ofs + n/MB;i++) printf("%d ",md[i]);\r
+ printf("\n");\r
+#endif\r
+\r
+ if (opt & OPT_LEAF) {\r
+ mymalloc(b->da_leaf,1,0);\r
+ darray_pat_construct(b->da_leaf, n, B, 2, 0x2, opt & OPT_FAST_LEAF_SELECT);\r
+ printf("leaf rank/select table: %d bytes (%1.2f bpc)\n",b->da_leaf->idx_size,(double)b->da_leaf->idx_size*8/n);\r
+ b->idx_size += b->da_leaf->idx_size; \r
+ } else {\r
+ b->da_leaf = NULL;\r
+ }\r
+\r
+ if (opt & OPT_INORDER) {\r
+ mymalloc(b->da_inorder,1,0);\r
+ darray_pat_construct(b->da_inorder, n, B, 2, 0x1, opt & OPT_FAST_INORDER_SELECT);\r
+ printf("inorder rank/select table: %d bytes (%1.2f bpc)\n",b->da_inorder->idx_size,(double)b->da_inorder->idx_size*8/n);\r
+ b->idx_size += b->da_inorder->idx_size;\r
+ } else {\r
+ b->da_inorder = NULL;\r
+ }\r
+\r
+ if (opt & OPT_FAST_POSTORDER_SELECT) {\r
+ mymalloc(b->da_postorder,1,0);\r
+ darray_pat_construct(b->da_postorder, n, B, 1, 0x0, (opt & OPT_FAST_POSTORDER_SELECT) | OPT_NO_RANK);\r
+ printf("postorder rank/select table: %d bytes (%1.2f bpc)\n",b->da_postorder->idx_size,(double)b->da_postorder->idx_size*8/n);\r
+ b->idx_size += b->da_postorder->idx_size;\r
+ } else {\r
+ b->da_postorder = NULL;\r
+ }\r
+\r
+ if (opt & OPT_DFUDS_LEAF) {\r
+ mymalloc(b->da_dfuds_leaf,1,0);\r
+ darray_pat_construct(b->da_dfuds_leaf, n, B, 2, 0x0, opt & OPT_FAST_DFUDS_LEAF_SELECT);\r
+ printf("dfuds leaf rank/select table: %d bytes (%1.2f bpc)\n",b->da_dfuds_leaf->idx_size,(double)b->da_dfuds_leaf->idx_size*8/n);\r
+ b->idx_size += b->da_dfuds_leaf->idx_size;\r
+ } else {\r
+ b->da_dfuds_leaf = NULL;\r
+ }\r
+\r
+ return 0;\r
+}\r
+\r
+// destroyTree: frees the memory of tree.\r
+void destroyTree(bp *b) {\r
+ if (!b) return; // nothing to free\r
+\r
+ destroyDarray(b->da); // destroys da data structure\r
+ if (b->da) free(b->da); \r
+ \r
+ if (b->sm) free(b->sm);\r
+ if (b->sM) free(b->sM);\r
+ if (b->sd) free(b->sd);\r
+ if (b->mm) free(b->mm);\r
+ if (b->mM) free(b->mM);\r
+ if (b->md) free(b->md);\r
+ \r
+ destroyDarray(b->da_leaf);\r
+ if (b->da_leaf) free(b->da_leaf);\r
+ \r
+ destroyDarray(b->da_inorder);\r
+ if (b->da_inorder) free(b->da_inorder);\r
+ \r
+ destroyDarray(b->da_postorder);\r
+ if (b->da_postorder) free(b->da_postorder);\r
+ \r
+ destroyDarray(b->da_dfuds_leaf);\r
+ if (b->da_dfuds_leaf) free(b->da_dfuds_leaf);\r
+}\r
+\r
+\r
+// saveTree: saves parentheses data structure to file\r
+// By Diego Arroyuelo\r
+void saveTree(bp *b, FILE *fp) {\r
+\r
+ if (fwrite(&(b->n), sizeof(int), 1, fp) != 1) {\r
+ printf("Error: cannot save number of parentheses to file\n");\r
+ exit(1);\r
+ }\r
+\r
+ if (fwrite(b->B, sizeof(pb), (b->n+D-1)/D, fp) != ((b->n+D-1)/D)) {\r
+ printf("Error: cannot save parentheses sequence to file\n");\r
+ exit(1);\r
+ }\r
+\r
+ if (fwrite(&(b->opt), sizeof(int), 1, fp) != 1) {\r
+ printf("Error: cannot save opt in parentheses to file\n");\r
+ exit(1);\r
+ }\r
+}\r
+\r
+// loadTree: load parentheses data structure from file\r
+// By Diego Arroyuelo\r
+void loadTree(bp *b, FILE *fp) {\r
+\r
+ pb *B;\r
+ int n, opt;\r
+\r
+ if (fread(&n, sizeof(int), 1, fp) != 1) {\r
+ printf("Error: cannot read number of parentheses from file\n");\r
+ exit(1);\r
+ }\r
+\r
+ mymalloc(B,(n+D-1)/D,0);\r
+ \r
+ if (fread(B, sizeof(pb), (n+D-1)/D, fp) != ((n+D-1)/D)) {\r
+ printf("Error: cannot read parentheses sequence from file\n");\r
+ exit(1);\r
+ }\r
+\r
+ if (fread(&opt, sizeof(int), 1, fp) != 1) {\r
+ printf("Error: cannot read opt in parentheses from file\n");\r
+ exit(1);\r
+ }\r
+ \r
+ bp_construct(b, n, B, opt); \r
+ \r
+}\r
+\r
+\r
+\r
+int naive_fwd_excess(bp *b,int s, int rel)\r
+{\r
+ int i,v,n;\r
+ pb *B;\r
+ n = b->n; B = b->B;\r
+ v = 0;\r
+ for (i=s+1; i<n; i++) {\r
+ if (getbit(B,i)==OP) {\r
+ v++;\r
+ } else {\r
+ v--;\r
+ }\r
+ if (v == rel) return i;\r
+ }\r
+ return -1;\r
+}\r
+\r
+int naive_bwd_excess(bp *b,int s, int rel)\r
+{\r
+ int i,v;\r
+ pb *B;\r
+ B = b->B;\r
+ v = 0;\r
+ for (i=s; i>=0; i--) {\r
+ if (getbit(B,i)==OP) {\r
+ v--;\r
+ } else {\r
+ v++;\r
+ }\r
+ if (v == rel) return i-1;\r
+ }\r
+ return -2;\r
+}\r
+\r
+int naive_search_SB_l(bp *b, int i, int rel)\r
+{\r
+ int il,v;\r
+\r
+ il = (i / SB) * SB;\r
+ for (; i>=il; i--) {\r
+ if (getbit(b->B,i)==OP) {\r
+ rel++;\r
+ } else {\r
+ rel--;\r
+ }\r
+ if (rel == 0) return i-1;\r
+ }\r
+ if (i < 0) return -2;\r
+ return -3;\r
+}\r
+\r
+int naive_rmq(bp *b, int s, int t,int opt)\r
+{\r
+ int d,i,dm,im;\r
+\r
+ if (opt & OPT_RIGHT) {\r
+ d = dm = depth(b,t); im = t;\r
+ i = t-1;\r
+ while (i >= s) {\r
+ if (getbit(b->B,i+1)==CP) {\r
+ d++;\r
+ if (opt & OPT_MAX) {\r
+ if (d > dm) {\r
+ dm = d; im = i;\r
+ }\r
+ }\r
+ } else {\r
+ d--;\r
+ if (!(opt & OPT_MAX)) {\r
+ if (d < dm) {\r
+ dm = d; im = i;\r
+ }\r
+ }\r
+ }\r
+ i--;\r
+ }\r
+ } else {\r
+ d = dm = depth(b,s); im = s;\r
+ i = s+1;\r
+ while (i <= t) {\r
+ if (getbit(b->B,i)==OP) {\r
+ d++;\r
+ if (opt & OPT_MAX) {\r
+ if (d > dm) {\r
+ dm = d; im = i;\r
+ }\r
+ }\r
+ } else {\r
+ d--;\r
+ if (!(opt & OPT_MAX)) {\r
+ if (d < dm) {\r
+ dm = d; im = i;\r
+ }\r
+ }\r
+ }\r
+ i++;\r
+ }\r
+ }\r
+ return im;\r
+}\r
+\r
+int root_node(bp *b)\r
+{\r
+ return 0;\r
+}\r
+\r
+\r
+int rank_open(bp *b, int s)\r
+{\r
+ return darray_rank(b->da,s);\r
+}\r
+\r
+int rank_close(bp *b, int s)\r
+{\r
+ return s+1 - darray_rank(b->da,s);\r
+}\r
+\r
+int select_open(bp *b, int s)\r
+{\r
+ if (b->opt & OPT_FAST_PREORDER_SELECT) {\r
+ return darray_select(b->da,s,1);\r
+ } else {\r
+ return darray_select_bsearch(b->da,s,getpat_preorder);\r
+ }\r
+}\r
+\r
+int select_close(bp *b, int s)\r
+{\r
+ if (b->opt & OPT_FAST_POSTORDER_SELECT) {\r
+ return darray_pat_select(b->da_postorder,s,getpat_postorder);\r
+ } else {\r
+ return postorder_select_bsearch(b,s);\r
+ }\r
+}\r
+\r
+///////////////////////////////////////////\r
+// find_close(bp *b,int s)\r
+// returns the matching close parenthesis of s\r
+///////////////////////////////////////////\r
+int find_close(bp *b,int s)\r
+{\r
+ return fwd_excess(b,s,-1);\r
+}\r
+\r
+///////////////////////////////////////////\r
+// find_open(bp *b,int s)\r
+// returns the matching open parenthesis of s\r
+///////////////////////////////////////////\r
+int find_open(bp *b,int s)\r
+{\r
+ int r;\r
+ r = bwd_excess(b,s,0);\r
+ if (r >= -1) return r+1;\r
+ return -1;\r
+}\r
+\r
+///////////////////////////////////////////\r
+// parent(bp *b,int s)\r
+// returns the parent of s\r
+// -1 if s is the root\r
+///////////////////////////////////////////\r
+int parent(bp *b,int s)\r
+{\r
+ int r;\r
+ r = bwd_excess(b,s,-2);\r
+ if (r >= -1) return r+1;\r
+ return -1;\r
+}\r
+\r
+int enclose(bp *b,int s)\r
+{\r
+ return parent(b,s);\r
+}\r
+\r
+///////////////////////////////////////////\r
+// level_ancestor(bp *b,int s,int d)\r
+// returns the ancestor of s with relative depth d (d < 0)\r
+// -1 if no such node\r
+///////////////////////////////////////////\r
+int level_ancestor(bp *b,int s,int d)\r
+{\r
+ int r;\r
+ r = bwd_excess(b,s,d-1);\r
+ if (r >= -1) return r+1;\r
+ return -1;\r
+}\r
+\r
+///////////////////////////////////////////\r
+// lca(bp *b, int s, int t)\r
+// returns the lowest common ancestor of s and t\r
+///////////////////////////////////////////\r
+int lca(bp *b, int s, int t)\r
+{\r
+ return parent(b,rmq(b,s,t,0)+1);\r
+}\r
+\r
+\r
+///////////////////////////////////////////\r
+// preorder_rank(bp *b,int s)\r
+// returns the preorder (>= 1) of node s (s >= 0)\r
+///////////////////////////////////////////\r
+int preorder_rank(bp *b,int s)\r
+{\r
+ return darray_rank(b->da,s);\r
+}\r
+\r
+///////////////////////////////////////////\r
+// preorder_select(bp *b,int s)\r
+// returns the node with preorder s (s >= 1)\r
+// -1 if no such node\r
+///////////////////////////////////////////\r
+int preorder_select(bp *b,int s)\r
+{\r
+ // no error handling\r
+ if (b->opt & OPT_FAST_PREORDER_SELECT) {\r
+ return darray_select(b->da,s,1);\r
+ } else {\r
+ return darray_select_bsearch(b->da,s,getpat_preorder);\r
+ }\r
+}\r
+\r
+///////////////////////////////////////////\r
+// postorder_rank(bp *b,int s)\r
+// returns the postorder (>= 1) of node s (s >= 0)\r
+// -1 if s-th bit is not OP\r
+///////////////////////////////////////////\r
+int postorder_rank(bp *b,int s)\r
+{\r
+ int t;\r
+ if (inspect(b,s) == CP) return -1;\r
+ t = find_close(b,s);\r
+ // return t+1 - darray_rank(b->da,t);\r
+ return rank_close(b,t);\r
+}\r
+\r
+int postorder_select_bsearch(bp *b,int s)\r
+{\r
+ int l,r,m;\r
+\r
+ if (s == 0) return -1;\r
+\r
+ if (s > b->da->n - b->da->m) {\r
+ return -1;\r
+ }\r
+ l = 0; r = b->da->n - 1;\r
+\r
+ while (l < r) {\r
+ m = (l+r)/2;\r
+ //printf("m=%d rank=%d s=%d\n",m,m+1 - darray_rank(b->da,m),s);\r
+ if (m+1 - darray_rank(b->da,m) >= s) {\r
+ r = m;\r
+ } else {\r
+ l = m+1;\r
+ }\r
+ }\r
+ return l;\r
+}\r
+\r
+///////////////////////////////////////////\r
+// postorder_select(bp *b,int s)\r
+// returns the position of CP of the node with postorder s (>= 1)\r
+///////////////////////////////////////////\r
+int postorder_select(bp *b,int s)\r
+{\r
+#if 0\r
+ if (b->opt & OPT_FAST_POSTORDER_SELECT) {\r
+ return darray_pat_select(b->da_postorder,s,getpat_postorder);\r
+ } else {\r
+ return postorder_select_bsearch(b->da,s);\r
+ }\r
+#else\r
+ return select_close(b,s);\r
+#endif\r
+}\r
+\r
+///////////////////////////////////////////\r
+// leaf_rank(bp *b,int s)\r
+// returns the number of leaves to the left of s\r
+///////////////////////////////////////////\r
+int leaf_rank(bp *b,int s)\r
+{\r
+ if ((b->opt & OPT_LEAF) == 0) {\r
+ printf("leaf_rank: error!!! not supported\n");\r
+ return -1;\r
+ }\r
+ if (s >= b->n-1) {\r
+ s = b->n-2;\r
+ }\r
+ return darray_pat_rank(b->da_leaf,s,getpat_leaf);\r
+}\r
+\r
+///////////////////////////////////////////\r
+// leaf_select(bp *b,int s)\r
+// returns the position of s-th leaf\r
+///////////////////////////////////////////\r
+int leaf_select(bp *b,int s)\r
+{\r
+ if ((b->opt & OPT_LEAF) == 0) {\r
+ printf("leaf_select: error!!! not supported\n");\r
+ return -1;\r
+ }\r
+ if (s > b->da_leaf->m) return -1;\r
+ if (b->opt & OPT_FAST_LEAF_SELECT) {\r
+ return darray_pat_select(b->da_leaf,s,getpat_leaf);\r
+ } else {\r
+ return darray_select_bsearch(b->da_leaf,s,getpat_leaf);\r
+ }\r
+}\r
+\r
+\r
+///////////////////////////////////////////\r
+// inorder_rank(bp *b,int s)\r
+// returns the number of ")(" (s >= 0)\r
+///////////////////////////////////////////\r
+int inorder_rank(bp *b,int s)\r
+{\r
+ if ((b->opt & OPT_INORDER) == 0) {\r
+ printf("inorder_rank: error!!! not supported\n");\r
+ return -1;\r
+ }\r
+ if (s >= b->n-1) {\r
+ s = b->n-2;\r
+ }\r
+ return darray_pat_rank(b->da_inorder,s,getpat_inorder);\r
+}\r
+\r
+///////////////////////////////////////////\r
+// inorder_select(bp *b,int s)\r
+// returns the s-th position of ")(" (s >= 1)\r
+///////////////////////////////////////////\r
+int inorder_select(bp *b,int s)\r
+{\r
+ if ((b->opt & OPT_INORDER) == 0) {\r
+ printf("inorder_select: error!!! not supported\n");\r
+ return -1;\r
+ }\r
+ if (b->opt & OPT_FAST_INORDER_SELECT) {\r
+ return darray_pat_select(b->da_inorder,s,getpat_inorder);\r
+ } else {\r
+ return darray_select_bsearch(b->da_inorder,s,getpat_inorder);\r
+ }\r
+}\r
+\r
+///////////////////////////////////////////\r
+// leftmost_leaf(bp *b, int s)\r
+///////////////////////////////////////////\r
+int leftmost_leaf(bp *b, int s)\r
+{\r
+ if ((b->opt & OPT_LEAF) == 0) {\r
+ printf("leftmost_leaf: error!!! not supported\n");\r
+ return -1;\r
+ }\r
+ return leaf_select(b,leaf_rank(b,s)+1);\r
+}\r
+\r
+///////////////////////////////////////////\r
+// rightmost_leaf(bp *b, int s)\r
+///////////////////////////////////////////\r
+int rightmost_leaf(bp *b, int s)\r
+{\r
+ int t;\r
+ if ((b->opt & OPT_LEAF) == 0) {\r
+ printf("leftmost_leaf: error!!! not supported\n");\r
+ return -1;\r
+ }\r
+ t = find_close(b,s);\r
+ return leaf_select(b,leaf_rank(b,t));\r
+}\r
+\r
+\r
+\r
+///////////////////////////////////////////\r
+// inspect(bp *b, int s)\r
+// returns OP (==1) or CP (==0) at s-th bit (0 <= s < n)\r
+///////////////////////////////////////////\r
+int inspect(bp *b, int s)\r
+{\r
+ if (s < 0 || s >= b->n) {\r
+ printf("inspect: error s=%d is out of [%d,%d]\n",s,0,b->n-1);\r
+ }\r
+ return getbit(b->B,s);\r
+}\r
+\r
+int isleaf(bp *b, int s)\r
+{\r
+ if (inspect(b,s) != OP) {\r
+ printf("isleaf: error!!! B[%d] = OP\n",s);\r
+ }\r
+ if (inspect(b,s+1) == CP) return 1;\r
+ else return 0;\r
+}\r
+\r
+\r
+///////////////////////////////////////////\r
+// subtree_size(bp *b, int s)\r
+// returns the number of nodes in the subtree of s\r
+///////////////////////////////////////////\r
+int subtree_size(bp *b, int s)\r
+{\r
+ return (find_close(b,s) - s + 1) / 2;\r
+}\r
+\r
+///////////////////////////////////////////\r
+// first_child(bp *b, int s)\r
+// returns the first child\r
+// -1 if s is a leaf\r
+///////////////////////////////////////////\r
+int first_child(bp *b, int s)\r
+{\r
+ if (inspect(b,s+1) == CP) return -1;\r
+ return s+1;\r
+}\r
+\r
+///////////////////////////////////////////\r
+// next_sibling(bp *b,int s)\r
+// returns the next sibling of parent(s)\r
+// -1 if s is the last child\r
+//////////////////////////////////////////\r
+int next_sibling(bp *b, int s)\r
+{\r
+ int t;\r
+ t = find_close(b,s)+1;\r
+ if (t >= b->n) {\r
+ printf("next_sibling: error s=%d t=%d\n",s,t);\r
+ }\r
+ if (inspect(b,t) == CP) return -1;\r
+ return t;\r
+}\r
+\r
+///////////////////////////////////////////\r
+// prev_sibling(bp *b,int s)\r
+// returns the previous sibling of parent(s)\r
+// -1 if s is the first child\r
+//////////////////////////////////////////\r
+int prev_sibling(bp *b, int s)\r
+{\r
+ int t;\r
+ if (s < 0) {\r
+ printf("prev_sibling: error s=%d\n",s);\r
+ }\r
+ if (s == 0) return -1;\r
+ if (inspect(b,s-1) == OP) return -1;\r
+ t = find_open(b,s-1);\r
+ return t;\r
+}\r
+\r
+///////////////////////////////////////////\r
+// deepest_node(bp *b,int s)\r
+// returns the first node with the largest depth in the subtree of s\r
+///////////////////////////////////////////\r
+int deepest_node(bp *b,int s)\r
+{\r
+ int t,m;\r
+ t = find_close(b,s);\r
+ m = rmq(b,s,t, OPT_MAX);\r
+ return m;\r
+}\r
+\r
+///////////////////////////////////////////\r
+// subtree_height(bp *b,int s)\r
+// returns the height of the subtree of s\r
+// 0 if s is a leaf\r
+///////////////////////////////////////////\r
+int subtree_height(bp *b,int s)\r
+{\r
+ int t;\r
+ t = deepest_node(b,s);\r
+ return depth(b,t) - depth(b,s);\r
+}\r
+\r
+int naive_degree(bp *b, int s)\r
+{\r
+ int t,d;\r
+ d = 0;\r
+ t = first_child(b,s);\r
+ while (t >= 0) {\r
+ d++;\r
+ t = next_sibling(b,t);\r
+ }\r
+ return d;\r
+}\r
+\r
+///////////////////////////////////////////\r
+// degree(bp *b, int s)\r
+// returns the number of children of s\r
+// 0 if s is a leaf\r
+///////////////////////////////////////////\r
+int degree(bp *b, int s)\r
+{\r
+ if (b->opt & OPT_DEGREE) {\r
+ return fast_degree(b,s,b->n,0);\r
+ } else {\r
+ return naive_degree(b,s);\r
+ }\r
+}\r
+\r
+int naive_child(bp *b, int s, int d)\r
+{\r
+ int t,i;\r
+ t = first_child(b,s);\r
+ for (i = 1; i < d; i++) {\r
+ if (t == -1) break;\r
+ t = next_sibling(b,t);\r
+ }\r
+ return t;\r
+}\r
+\r
+///////////////////////////////////////////\r
+// child(bp *b, int s, int d)\r
+// returns the d-th child of s (1 <= d <= degree(s))\r
+// -1 if no such node\r
+///////////////////////////////////////////\r
+int child(bp *b, int s, int d)\r
+{\r
+ int r;\r
+ if (b->opt & OPT_DEGREE) {\r
+ //return find_open(b,fast_degree(b,s,b->n,d));\r
+ if (d==1) return first_child(b,s);\r
+ r = fast_degree(b,s,b->n,d-1)+1;\r
+ if (inspect(b,r) == CP) return -1;\r
+ return r;\r
+ } else {\r
+ return naive_child(b,s,d);\r
+ }\r
+ \r
+}\r
+\r
+\r
+int naive_child_rank(bp *b, int t)\r
+{\r
+ int v,d;\r
+ d = 0;\r
+ while (t != -1) {\r
+ d++;\r
+ t = prev_sibling(b,t);\r
+ }\r
+ return d;\r
+}\r
+\r
+///////////////////////////////////////////\r
+// child_rank(bp *b, int t)\r
+// returns d if t is the d-th child of the parent of t (d >= 1)\r
+// 1 if t is the root\r
+///////////////////////////////////////////\r
+int child_rank(bp *b, int t)\r
+{\r
+ int r;\r
+ if (t == root_node(b)) return 1;\r
+ if (b->opt & OPT_DEGREE) {\r
+ r = parent(b,t);\r
+ return fast_degree(b,r,t,0)+1;\r
+ } else {\r
+ return naive_child_rank(b,t);\r
+ }\r
+}\r
+\r
+\r
+\r
+///////////////////////////////////////////\r
+// is_ancestor(bp *b, int s, int t)\r
+// returns 1 if s is an ancestor of t\r
+// 0 otherwise\r
+///////////////////////////////////////////\r
+int is_ancestor(bp *b, int s, int t)\r
+{\r
+ int v;\r
+ v = find_close(b,s);\r
+ if (s <= t && t <= v) return 1;\r
+ return 0;\r
+}\r
+\r
+///////////////////////////////////////////\r
+// distance(bp *b, int s, int t)\r
+// returns the length of the shortest path from s to t in the tree\r
+///////////////////////////////////////////\r
+int distance(bp *b, int s, int t)\r
+{\r
+ int v,d;\r
+ v = lca(b,s,t);\r
+ d = depth(b,v);\r
+ return (depth(b,s) - d) + (depth(b,t) - d);\r
+}\r
+\r
+///////////////////////////////////////////\r
+// level_next(bp *b, int d)\r
+///////////////////////////////////////////\r
+int level_next(bp *b,int s)\r
+{\r
+ int t;\r
+ t = fwd_excess(b,s,0);\r
+ return t;\r
+}\r
+\r
+///////////////////////////////////////////\r
+// level_prev(bp *b, int d)\r
+///////////////////////////////////////////\r
+int level_prev(bp *b,int s)\r
+{\r
+ int t;\r
+ t = bwd_excess(b,s,0);\r
+ return t;\r
+}\r
+\r
+///////////////////////////////////////////\r
+// level_leftmost(bp *b, int d)\r
+///////////////////////////////////////////\r
+int level_leftmost(bp *b, int d)\r
+{\r
+ int t;\r
+ if (d < 1) return -1;\r
+ if (d == 1) return 0;\r
+ t = fwd_excess(b,0,d);\r
+ return t;\r
+}\r
+\r
+///////////////////////////////////////////\r
+// level_rigthmost(bp *b, int d)\r
+///////////////////////////////////////////\r
+int level_rigthmost(bp *b, int d)\r
+{\r
+ int t;\r
+ if (d < 1) return -1;\r
+ if (d == 1) return 0;\r
+ t = bwd_excess(b,0,d-1);\r
+ return find_open(b,t);\r
+}\r
+\r
+///////////////////////////////////////////\r
+// leaf_size(bp *b, int s)\r
+///////////////////////////////////////////\r
+int leaf_size(bp *b, int s)\r
+{\r
+ int t;\r
+ if ((b->opt & OPT_LEAF) == 0) {\r
+ printf("leaf_size: error!!! not supported\n");\r
+ return -1;\r
+ }\r
+ t = find_close(b,s);\r
+ return leaf_rank(b,t) - leaf_rank(b,s);\r
+}\r
--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include "darray.h"
+
+#define OP 1
+#define CP 0
+
+#define OPT_MIN 0
+#define OPT_MAX 1
+#define OPT_LEFT 0
+#define OPT_RIGHT 2
+
+#define OPT_LEAF (1<<0)
+#define OPT_INORDER (1<<1)
+#define OPT_DEGREE (1<<2)
+#define OPT_FAST_PREORDER_SELECT (1<<3)
+#define OPT_FAST_LEAF_SELECT (1<<4)
+#define OPT_FAST_INORDER_SELECT (1<<5)
+#define OPT_FAST_POSTORDER_SELECT (1<<6)
+#define OPT_DFUDS_LEAF (1<<7)
+#define OPT_FAST_DFUDS_LEAF_SELECT (1<<8)
+
+//#define logSB 9
+#define logSB 7
+//#define logSB 2
+#define SB (1<<logSB)
+//#define logMB 16
+//#define logMB 12
+#define logMB 15
+//#define logMB 3
+#define MB (1<<logMB)
+
+#define ETW 8 // width of excess lookup table
+#define W1 2 // branching factor
+
+#ifndef min
+ #define min(x,y) ((x)<(y)?(x):(y))
+#endif
+#ifndef max
+ #define max(x,y) ((x)>(y)?(x):(y))
+#endif
+
+
+typedef struct {
+ int n;
+ pb *B;
+ darray *da;
+ byte *sm, *sM;
+ byte *sd;
+ int *mm, *mM;
+ int *md;
+ int m_ofs;
+
+ darray *da_leaf;
+ darray *da_inorder;
+ darray *da_postorder;
+ darray *da_dfuds_leaf;
+ int idx_size;
+ int opt;
+} bp;
+
+int bp_construct(bp *b,int n, pb *B, int opt);
+void printbp(bp *b, int s, int t);
+
+
+int rank_open(bp *b, int s);
+int rank_close(bp *b, int s);
+int select_open(bp *b, int s);
+int select_close(bp *b, int s);
+
+
+int root_node(bp *b);
+int find_close(bp *b,int s);
+int find_open(bp *b,int s);
+int enclose(bp *b,int s);
+int parent(bp *b,int s);
+int level_ancestor(bp *b,int s,int d);
+int depth(bp *b, int s);
+int preorder_rank(bp *b,int s);
+int postorder_rank(bp *b,int s);
+int inspect(bp *b, int s);
+int isleaf(bp *b, int s);
+int rmq(bp *b, int s, int t, int opt);
+int subtree_size(bp *b, int s);
+int first_child(bp *b, int s);
+int next_sibling(bp *b, int s);
+int prev_sibling(bp *b, int s);
+int deepest_node(bp *b,int s);
+int subtree_height(bp *b,int s);
+int is_ancestor(bp *b, int s, int t);
+int distance(bp *b, int s, int t);
+int level_lefthmost(bp *b, int d);
+int level_rigthmost(bp *b, int d);
+int degree(bp *b,int s);
+
+// not efficient
+int naive_degree(bp *b, int s);
+int naive_child(bp *b, int s, int d);
+int naive_child_rank(bp *b, int t);
+int naive_rmq(bp *b, int s, int t,int opt);
+int postorder_select(bp *b,int s);
+
+// using preorder select index
+int preorder_select(bp *b,int s);
+
+// using leaf index
+int leaf_rank(bp *b,int s);
+int leaf_size(bp *b, int s);
+int leftmost_leaf(bp *b, int s);
+int rightmost_leaf(bp *b, int s);
+
+// using leaf select index
+int leaf_select(bp *b,int s);
+
+// using inorder index
+int inorder_rank(bp *b,int s);
+
+// using inorder select index
+int inorder_select(bp *b,int s);
+
+// using degree index
+int fast_degree(bp *b,int s, int t, int ith);
+int child_rank(bp *b, int t);
+int child(bp *b, int s, int d);
+
+
+// new functions for persistence purposes, added by Diego Arroyuelo
+void saveTree(bp *b, FILE *fp);
+void loadTree(bp *b, FILE *fp);
+void destroyTree(bp *b);
+
+int blog(int x);
+pb getpat_preorder(pb *b);
+pb getpat_leaf(pb *b);
+pb getpat_inorder(pb *b);
+pb getpat_postorder(pb *b);
+
+
+int fwd_excess(bp *b,int s, int rel);
+int bwd_excess(bp *b,int s, int rel);
+
+extern int *matchtbl,*parenttbl;
+void make_naivetbl(pb *B,int n);
+
+extern int popCount[1<<ETW];
+extern int fwdtbl[(2*ETW+1)*(1<<ETW)];
+extern int bwdtbl[(2*ETW+1)*(1<<ETW)];
+extern int mintbl_li[1<<ETW], mintbl_lv[1<<ETW];
+extern int mintbl_ri[1<<ETW], mintbl_rv[1<<ETW];
+extern int maxtbl_li[1<<ETW], maxtbl_lv[1<<ETW];
+extern int maxtbl_ri[1<<ETW], maxtbl_rv[1<<ETW];
+
+extern int minmaxtbl_i[4][1<<ETW], minmaxtbl_v[4][1<<ETW];
+extern int degtbl[1<<ETW];
+extern int degtbl2[(2*ETW+1)*(1<<ETW)];
+extern int childtbl[(ETW)*(1<<ETW)];
+extern int depthtbl[(2*ETW+1)*(1<<ETW)];
+extern int childtbl2[2*ETW+1][ETW][(1<<ETW)];
--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include "bp.h"
+
+#define NOTFOUND -2
+#define CONTINUE -3
+#define END -4
+#define FOUND -5
+
+#define SBid(i) ((i)>>logSB)
+#define SBfirst(i) ((i) & (~(SB-1)))
+#define SBlast(i) ((i) | (SB-1))
+
+#define MBid(i) ((i)>>logMB)
+#define MBfirst(i) ((i) & (~(MB-1)))
+#define MBlast(i) ((i) | (MB-1))
+
+
+int blog(int x)
+{
+int l;
+ l = 0;
+ while (x>0) {
+ x>>=1;
+ l++;
+ }
+ return l;
+}
+
+
+pb getpat_preorder(pb *b)
+{
+ return *b;
+}
+
+pb getpat_postorder(pb *b)
+{
+ return ~(*b);
+}
+
+pb getpat_leaf(pb *b)
+{
+ pb w1,w2,w;
+ w1 = b[0];
+ w2 = (w1 << 1) + (b[1] >> (D-1));
+ w = w1 & (~w2);
+ return w;
+}
+
+pb getpat_inorder(pb *b)
+{
+ pb w1,w2,w;
+ w1 = b[0];
+ w2 = (w1 << 1) + (b[1] >> (D-1));
+ w = (~w1) & w2;
+ return w;
+}
+
+pb getpat_dfuds_leaf(pb *b)
+{
+ pb w1,w2,w;
+ w1 = b[0];
+ w2 = (w1 << 1) + (b[1] >> (D-1));
+ w = (~w1) & (~w2);
+ return w;
+}
+
+
+
+///////////////////////////////////////////
+// depth(bp *b, int s)
+// returns the depth of s
+// The root node has depth 1
+///////////////////////////////////////////
+int depth(bp *b, int s)
+{
+ int d;
+ if (s < 0) return 0;
+ d = 2 * darray_rank(b->da,s) - (s+1);
+#if 0
+ if (d != naive_depth(b,s)) {
+ d = naive_depth(b,s);
+ darray_rank(b->da,s);
+ }
+ //printf("depth(%d)=%d\n",s,d);
+#endif
+ return d;
+}
+
+int fast_depth(bp *b, int s)
+{
+ int d;
+ darray *da;
+ int r,j;
+
+ s++;
+ if ((s & (RRR-1)) != 0) {
+ //printf("fast_depth:warning s=%d\n",s);
+ return depth(b,s);
+ }
+ da = b->da;
+ //d = 2 * (da->rl[s>>logR] + da->rm[s>>logRR] + da->rs[s>>logRRR]) - s;
+
+ r = da->rl[s>>logR] + da->rm[s>>logRR];
+ j = (s>>logRRR) & (RR/RRR-1);
+ while (j > 0) {
+ r += da->rs[((s>>logRR)<<(logRR-logRRR))+j-1];
+ j--;
+ }
+ d = 2 * r - s;
+
+ return d;
+}
+
+int search_SB_r(bp *b, int i, int rel)
+{
+ int j,r,n,il;
+ pb *p,x,w;
+
+ n = b->n;
+ il = min((SBid(i) + 1) << logSB,n);
+ p = &b->B[i>>logD];
+ while (i<il) {
+ x = *p++;
+ j = i & (D-1);
+ x <<= j;
+ j = D-j;
+ while (j>0) {
+ w = (x >> (D-ETW)) & ((1<<ETW)-1);
+ if (rel >= -ETW && rel <= ETW) {
+ r = fwdtbl[((rel+ETW)<<ETW)+w];
+ if (r<ETW && r<j) {
+ if (i+r >= n) return NOTFOUND;
+ return i+r;
+ }
+ }
+ r = min(j,ETW);
+ rel -= 2*popCount[w]-r;
+ x <<= r;
+ i += r;
+ j -= r;
+ }
+ }
+ return CONTINUE;
+}
+
+int search_MB_r(bp *b, int i, int td)
+{
+ int il,d;
+ int m,M,n;
+ pb *B;
+
+ B = b->B;
+ n = b->n;
+
+ il = min((MBid(i) + 1) << logMB,n);
+ for ( ; i < il; i+=SB) {
+#if (SB % RRR != 0)
+ d = depth(b,i-1);
+#else
+ d = fast_depth(b,i-1);
+#endif
+ m = d + b->sm[SBid(i)] - SB;
+ M = d + b->sM[SBid(i)] - 1;
+ if (m <= td && td <= M) {
+ return search_SB_r(b,i,td-d);
+ }
+ }
+ if (i >= n) return NOTFOUND;
+ return CONTINUE;
+}
+
+///////////////////////////////////////////
+// fwd_excess(bp *b,int s, int rel)
+// find the leftmost value depth(s)+rel to the right of s (exclusive)
+///////////////////////////////////////////
+int fwd_excess(bp *b,int s, int rel)
+{
+ int i,n;
+ int d,td;
+ int m,M;
+ int m_ofs;
+ pb *B;
+ n = b->n; B = b->B;
+
+ i = s+1;
+#if 1
+ d = search_SB_r(b,i,rel);
+ if (d >= NOTFOUND) return d;
+
+ i = min((SBid(i) + 1) << logSB,n);
+ td = depth(b,s) + rel;
+ d = search_MB_r(b,i,td);
+ if (d >= NOTFOUND) return d;
+#else
+ if (i != SBfirst(i)) {
+ d = search_SB_r(b,i,rel);
+ if (d >= NOTFOUND) return d;
+ }
+
+ td = depth(b,s) + rel;
+
+ i = SBid(i+SB-1) << logSB;
+
+ if (i != MBfirst(i)) {
+ d = search_MB_r(b,i,td);
+ if (d >= NOTFOUND) return d;
+ }
+#endif
+
+ m_ofs = b->m_ofs;
+ i = MBid(s) + m_ofs;
+ while (i > 0) {
+ if ((i&1) == 0) {
+ i++;
+ m = b->mm[i];
+ M = b->mM[i];
+ if (m <= td && td <= M) break;
+ }
+ i >>= 1;
+ }
+ if (i == 0) return NOTFOUND;
+
+ while (i < m_ofs) {
+ i <<= 1;
+ m = b->mm[i];
+ M = b->mM[i];
+ if (!(m <= td && td <= M)) i++;
+ }
+ i -= m_ofs;
+ i <<= logMB;
+
+ d = search_MB_r(b,i,td);
+ if (d >= NOTFOUND) return d;
+
+ // unexpected (bug)
+ printf("fwd_excess: ???\n");
+ return -99;
+
+}
+
+
+int degree_SB_slow(bp *b, int i, int t, int rel, int *ans, int ith)
+{
+ int j,r,n,il;
+ pb *p,x,w,w2;
+ int d, deg, v;
+
+ n = t;
+ il = min((SBid(i) + 1) << logSB,n);
+ d = deg = 0;
+
+ while (i < il) {
+ if (getbit(b->B,i)==OP) {
+ d++;
+ } else {
+ d--;
+ }
+ if (d < rel) { // reached the end
+ if (ith > 0) {
+ return NOTFOUND;
+ } else {
+ *ans = deg;
+ return END;
+ }
+ }
+ if (d == rel) { // found the same depth
+ deg++;
+ if (deg == ith) {
+ *ans = i;
+ return FOUND;
+ }
+ }
+ i++;
+ }
+ *ans = deg;
+ return CONTINUE;
+}
+
+int degree_SB(bp *b, int i, int t, int rel, int *ans, int ith)
+{
+ int j,r,n,il;
+ pb *p,x,w,w2;
+ int d, deg, v;
+ int degtmp,itmp;
+ int ith2,d2;
+
+ n = t;
+ il = min((SBid(i) + 1) << logSB,n);
+ d = deg = 0;
+
+ p = &b->B[i>>logD];
+ while (i < il) {
+ x = *p++;
+ j = i & (D-1);
+ x <<= j;
+ j = min(D-j,il-i);
+ while (j>0) {
+ w = (x >> (D-ETW)) & ((1<<ETW)-1);
+ w2 = 0;
+ if (j < ETW || il-i < ETW) {
+ r = max(ETW-j,ETW-(il-i));
+ w2 = (1<<r)-1;
+ }
+ v = minmaxtbl_v[0][w | w2];
+ if (d + v < rel) {
+ if (ith > 0) {
+#if 0
+ for (r = 0; r < ETW; r++) {
+ if (w & 0x80) {
+ d++;
+ } else {
+ d--;
+ if (d < rel) break;
+ if (d == rel) {
+ ith--;
+ if (ith == 0) {
+ *ans = i + r;
+ return FOUND;
+ }
+ }
+ }
+ w <<= 1;
+ }
+ return NOTFOUND;
+#else
+ r = childtbl2[rel-d+ETW][ith-1][w];
+ if (r >= 0) {
+ *ans = i + r;
+ return FOUND;
+ }
+ return NOTFOUND;
+#endif
+ }
+ r = ETW-1-minmaxtbl_i[0][w | w2];
+ w2 = (1<<r)-1;
+ deg += degtbl2[((rel-d+ETW)<<ETW) + (w & (~w2))];
+ *ans = deg;
+ return END;
+ }
+ if (d + v == rel) {
+ r = degtbl[w | w2];
+ deg += r;
+ if (ith > 0) {
+ if (ith <= r) {
+ *ans = i + childtbl[((ith-1)<<ETW) + (w | w2)];
+ return FOUND;
+ }
+ ith -= r;
+ }
+ }
+
+ r = min(j,ETW);
+ d += 2*popCount[w]-r;
+ x <<= r;
+ i += r;
+ j -= r;
+ }
+ }
+
+ *ans = deg;
+ return CONTINUE;
+}
+
+int degree_MB(bp *b, int i, int t, int td, int *ans,int ith)
+{
+ int il,d;
+ int m,M,n,r;
+ pb *B;
+ int deg,degtmp;
+
+ d = 0;
+ B = b->B;
+ n = t;
+
+ il = min((MBid(i) + 1) << logMB,n);
+ deg = 0;
+ for ( ; i+SB-1 < il; i+=SB) {
+#if (SB % RRR != 0)
+ d = depth(b,i-1);
+#else
+ d = fast_depth(b,i-1);
+#endif
+ m = d + b->sm[SBid(i)] - SB;
+ if (m < td) {
+ r = degree_SB(b,i,n,td-d,°tmp,ith);
+ if (ith > 0) {
+ if (r == NOTFOUND) return NOTFOUND;
+ *ans = degtmp;
+ return FOUND;
+ } else {
+ *ans = deg + degtmp;
+ return END;
+ }
+ }
+ if (m == td) {
+ if (ith > 0) {
+ if (ith <= b->sd[SBid(i)]) break;
+ ith -= b->sd[SBid(i)];
+ }
+ deg += b->sd[SBid(i)];
+ }
+ }
+ if (i < il) {
+#if (SB % RRR != 0)
+ d = depth(b,i-1);
+#else
+ d = fast_depth(b,i-1);
+#endif
+ r = degree_SB(b,i,n,td-d,°tmp,ith);
+ if (ith > 0) {
+ if (r == NOTFOUND) return NOTFOUND;
+ if (r == FOUND) {
+ *ans = degtmp;
+ return FOUND;
+ }
+ } else {
+ deg += degtmp;
+ }
+ }
+ *ans = deg;
+ if (i >= n) return END;
+ return CONTINUE;
+}
+
+static int partition_range(int s,int t)
+{
+ int i,j,h;
+
+ printf("partition [%d,%d] => ",s,t);
+ h = 1;
+ while (s <= t) {
+ if (s & h) {
+ if (s+h-1 <= t) {
+ printf("[%d,%d] ",s,s+h-1);
+ s += h;
+ }
+ } else {
+ if (s+h > t) break;
+ }
+ h <<= 1;
+ }
+ while (h > 0) {
+ if (s+h-1 <= t) {
+ printf("[%d,%d] ",s,s+h-1);
+ s += h;
+ }
+ h >>= 1;
+ }
+ printf("\n");
+}
+
+
+
+
+///////////////////////////////////////////
+// fast_degree(bp *b,int s, int t, int ith)
+// returns the number of children of s, to the left of t
+// returns the position of (ith)-th child of s if (ith > 0)
+///////////////////////////////////////////
+int fast_degree(bp *b,int s, int t, int ith)
+{
+ int i,j,n;
+ int d,td;
+ int m,M;
+ int m_ofs;
+ pb *B;
+ int deg,degtmp;
+ int sm,tm,ss,h;
+
+ n = t;
+ B = b->B;
+
+ deg = 0;
+
+ i = s+1;
+ if (i != SBfirst(i)) {
+ d = degree_SB(b,i,n,0,°tmp,ith);
+ if (ith > 0) {
+ if (d == NOTFOUND) return -1;
+ if (d == FOUND) return degtmp;
+ ith -= degtmp;
+ }
+ if (d == END) return degtmp;
+ deg += degtmp;
+ }
+
+ td = depth(b,s);
+
+ i = SBid(i+SB-1) << logSB;
+
+ if (i != MBfirst(i)) {
+ d = degree_MB(b,i,n,td,°tmp,ith);
+ if (ith > 0) {
+ if (d == NOTFOUND) return -1;
+ if (d == FOUND) return degtmp;
+ ith -= degtmp;
+ deg += degtmp;
+ } else {
+ deg += degtmp;
+ if (d == END) {
+ return deg;
+ }
+ }
+ }
+
+#if 0
+ // sequential search
+
+ sm = MBid(i+MB-1);
+ tm = MBid((n-1)+1)-1; // the rightmost MB fully contained in [0,n-1]
+
+ m_ofs = b->m_ofs;
+ sm += m_ofs; tm += m_ofs;
+ for (i=sm; i<=tm; i++) {
+ if (b->mm[i] < td) {
+ break;
+ }
+ if (b->mm[i] == td) {
+ if (ith > 0) {
+ if (ith <= b->md[i]) break;
+ ith -= b->md[i];
+ }
+ deg += b->md[i];
+ }
+ }
+ ss = i - m_ofs;
+#else
+ sm = MBid(i+MB-1);
+ tm = MBid((n-1)+1)-1; // the rightmost MB fully contained in [0,n-1]
+
+ m_ofs = b->m_ofs;
+ sm += m_ofs; tm += m_ofs;
+ ss = sm;
+
+ //partition_range(sm,tm);
+
+ //printf("partition [%d,%d] => ",sm,tm);
+ h = 1;
+ while (sm <= tm) {
+ if (sm & h) {
+ if (sm+h-1 <= tm) {
+ //printf("[%d,%d] ",sm,sm+h-1);
+ j = sm / h;
+ if (b->mm[j] < td) {
+ h >>= 1;
+ break;
+ }
+ if (b->mm[j] == td) {
+ if (ith > 0) {
+ if (ith <= b->md[j]) {
+ h >>= 1;
+ break;
+ }
+ ith -= b->md[j];
+ }
+ deg += b->md[j];
+ }
+ sm += h;
+ }
+ } else {
+ if (sm+h > tm) break;
+ }
+ h <<= 1;
+ }
+ while (h > 0) {
+ if (sm+h-1 <= tm) {
+ //printf("[%d,%d] ",sm,sm+h-1);
+ j = sm / h;
+ if (ith > 0) {
+ if (b->mm[j] >= td) {
+ if (b->mm[j] == td) {
+ if (ith > b->md[j]) {
+ ith -= b->md[j];
+ sm += h;
+ } else {
+ deg += b->md[j];
+ }
+ } else {
+ sm += h;
+ }
+ }
+ } else {
+ if (b->mm[j] >= td) {
+ if (b->mm[j] == td) {
+ deg += b->md[j];
+ }
+ sm += h;
+ }
+ }
+ }
+ h >>= 1;
+ }
+ //printf("\n");
+ ss = sm;
+
+ ss -= m_ofs;
+
+#endif
+
+ ss <<= logMB;
+
+ d = degree_MB(b,ss,n,td,°tmp,ith);
+ if (ith > 0) {
+ if (d == NOTFOUND) return -1;
+ if (d == FOUND) return degtmp;
+ }
+ deg += degtmp;
+ if (d == END) return deg;
+ return deg;
+
+ // unexpected (bug)
+ printf("degree: ???\n");
+ return -99;
+
+}
+
+int search_SB_l(bp *b, int i, int rel)
+{
+ int j,r,il;
+ pb *p,x,w;
+
+ il = SBfirst(i);
+
+ p = &b->B[i>>logD];
+ while (i>=il) {
+ x = *p--;
+ j = (i & (D-1))+1;
+ x >>= D-j;
+ while (j>0) {
+ w = x & ((1<<ETW)-1);
+ if (rel >= -ETW && rel <= ETW) {
+ r = bwdtbl[((rel+ETW)<<ETW)+w];
+ if (r<ETW && r<j) {
+ if (i-r < 0) return NOTFOUND;
+ return i-r-1;
+ }
+ }
+ r = min(j,ETW);
+ rel += 2*popCount[w]-r;
+ x >>= r;
+ i -= r;
+ j -= r;
+ }
+
+ }
+ if (i < 0) return NOTFOUND;
+ return CONTINUE;
+}
+
+int search_MB_l(bp *b, int i, int td)
+{
+ int il,d;
+ int m,M;
+ pb *B;
+
+#if 0
+ if (i % SB != SB-1) {
+ printf("search_MB_l:error!!! i=%d SB=%d\n",i,i%SB);
+ }
+#endif
+ B = b->B;
+
+ il = MBfirst(i);
+ for ( ; i >= il; i-=SB) {
+#if (SB % RRR != 0)
+ d = depth(b,i-SB);
+#else
+ d = fast_depth(b,i-SB);
+#endif
+ m = d + b->sm[SBid(i)] - SB;
+ M = d + b->sM[SBid(i)] - 1;
+ if (m <= td && td <= M) {
+#if (SB % RRR != 0)
+ d = depth(b,i);
+#else
+ d = fast_depth(b,i);
+#endif
+ if (d == td) return i;
+ return search_SB_l(b,i,td-d);
+ }
+ }
+ return CONTINUE;
+}
+
+///////////////////////////////////////////
+// bwd_excess(bp *b,int s, int rel)
+// find the rightmost value depth(s)+rel to the left of s (exclusive)
+///////////////////////////////////////////
+int bwd_excess(bp *b,int s, int rel)
+{
+ int i,n;
+ int d,td;
+ int m,M;
+ int m_ofs;
+ pb *B;
+ n = b->n; B = b->B;
+
+ i = s;
+ d = search_SB_l(b,i,rel);
+ if (d >= NOTFOUND) return d;
+
+ i = SBfirst(i) -1;
+
+ td = depth(b,s) + rel;
+
+ d = search_MB_l(b,i,td);
+ if (d >= NOTFOUND) return d;
+
+ m_ofs = b->m_ofs;
+ i = (s>>logMB) + m_ofs;
+ while (i > 0) {
+ if ((i&1) == 1) {
+ i--;
+ m = b->mm[i];
+ M = b->mM[i];
+ if (m <= td && td <= M) break;
+ }
+ i >>= 1;
+ }
+ if (i == 0) {
+ if (td == 0) return -1;
+ else return NOTFOUND;
+ }
+
+ while (i < m_ofs) {
+ i = i*2 + 1;
+ m = b->mm[i];
+ M = b->mM[i];
+ if (!(m <= td && td <= M)) i--;
+ }
+ i -= m_ofs;
+ i = ((i+1)<<logMB)-1;
+
+ d = search_MB_l(b,i,td);
+ if (d >= NOTFOUND) return d;
+
+ // unexpected (bug)
+ printf("bwd_excess: ???\n");
+ return -99;
+
+}
+
+int rmq_SB(bp *b, int s, int t, int opt, int *dm)
+{
+ int i,d;
+ int is,ds;
+ pb *p,x,w,w2;
+ int j,v,r;
+ int lr;
+ int op;
+
+ lr = 0; if (opt & OPT_RIGHT) lr = 1;
+ op = opt & (OPT_RIGHT | OPT_MAX);
+
+ is = s; ds = d = 0;
+ i = s+1;
+
+#if SB >= ETW
+ p = &b->B[i>>logD];
+ while (i <= t) {
+ x = *p++;
+ j = i & (D-1);
+ x <<= j;
+ j = min(D-j,t-i+1);
+ while (j>0) {
+ w = (x >> (D-ETW)) & ((1<<ETW)-1);
+ w2 = 0;
+ if (j < ETW || t-i < ETW-1) {
+ r = max(ETW-j,ETW-1-(t-i));
+ w2 = (1<<r)-1;
+ }
+
+ if (op & OPT_MAX) {
+ v = minmaxtbl_v[op][w & (~w2)];
+ if (d + v + lr > ds) {
+ ds = d + v;
+ is = i + minmaxtbl_i[op][w & (~w2)];
+ }
+ } else {
+ v = minmaxtbl_v[op][w | w2];
+ if (d + v < ds +lr) {
+ ds = d + v;
+ is = i + minmaxtbl_i[op][w | w2];
+ }
+ }
+
+ r = min(j,ETW);
+ d += 2*popCount[w]-r;
+ x <<= r;
+ i += r;
+ j -= r;
+ }
+ }
+#else
+ while (i <= t) {
+ if (getbit(b->B,i)==OP) {
+ d++;
+ if (op & OPT_MAX) {
+ if (d + lr > ds) {
+ ds = d; is = i;
+ }
+ }
+ } else {
+ d--;
+ if (!(op & OPT_MAX)) {
+ if (d < ds + lr) {
+ ds = d; is = i;
+ }
+ }
+ }
+ i++;
+ }
+#endif
+ *dm = ds;
+ return is;
+}
+
+int rmq_MB(bp *b, int s, int t, int opt, int *dm)
+{
+ int i,d,m;
+ int mi,md;
+ int lr;
+
+ lr = 0; if (opt & OPT_RIGHT) lr = 1;
+
+ md = *dm; mi = -1;
+ for (i = s; i <= t; i++) {
+#if (SB % RRR != 0)
+ d = depth(b,(i<<logSB)-1);
+#else
+ d = fast_depth(b,(i<<logSB)-1);
+#endif
+ if (opt & OPT_MAX) {
+ m = d + b->sM[i] - 1;
+ if (m + lr > md) {
+ md = m; mi = i;
+ }
+ } else {
+ m = d + b->sm[i] - SB;
+ if (m < md + lr) {
+ md = m; mi = i;
+ }
+ }
+ }
+ *dm = md;
+ return mi;
+}
+
+
+
+
+///////////////////////////////////////////
+// rmq(bp *b, int s, int t, int opt)
+// returns the index of leftmost/rightmost minimum/maximum value
+// in the range [s,t] (inclusive)
+// returns the leftmost minimum if opt == 0
+// the rightmost one if (opt & OPT_RIGHT)
+// the maximum if (opt & OPT_MAX)
+///////////////////////////////////////////
+int rmq(bp *b, int s, int t, int opt)
+{
+ int ss, ts; // SB index of s and t
+ int sm, tm; // MB index of s and t
+ int ds; // current min value
+ int is; // current min index
+ int ys; // type of current min index
+ // 0: is is the index of min
+ // 1: is is the SB index
+ // 2: is is the MB index
+ int m_ofs;
+ int i,j,il,d,n;
+ int dm;
+ int lr;
+
+ lr = 0; if (opt & OPT_RIGHT) lr = 1;
+
+ n = b->n;
+ if (s < 0 || t >= n || s > t) {
+ printf("rmq: error s=%d t=%d n=%d\n",s,t,n);
+ return -1;
+ }
+ if (s == t) return s;
+
+
+ ////////////////////////////////////////////////////////////
+ // search the SB of s
+ ////////////////////////////////////////////////////////////
+
+ il = min(SBlast(s),t);
+ is = rmq_SB(b,s,il,opt,&dm);
+ if (il == t) { // scan reached the end of the range
+ return is;
+ }
+ ds = depth(b,s) + dm; ys = 0;
+
+ ////////////////////////////////////////////////////////////
+ // search the MB of s
+ ////////////////////////////////////////////////////////////
+
+ ss = SBid(s) + 1;
+ il = min(SBid(MBlast(s)),SBid(t)-1);
+ dm = ds;
+ j = rmq_MB(b,ss,il,opt,&dm);
+ //if (dm < ds + lr) {
+ if (j >= 0) {
+ ds = dm; is = j; ys = 1;
+ }
+
+ ////////////////////////////////////////////////////////////
+ // search the tree
+ ////////////////////////////////////////////////////////////
+
+ sm = MBid(s) + 1;
+ tm = MBid(t) - 1;
+
+#if 0
+ // sequential search
+ m_ofs = b->m_ofs;
+ sm += m_ofs; tm += m_ofs;
+ for (i=sm; i<=tm; i++) {
+ if (opt & OPT_MAX) {
+ if (b->mM[i] + lr > ds) {
+ ds = b->mM[i]; is = i - m_ofs; ys = 2;
+ }
+ } else {
+ if (b->mm[i] < ds + lr) {
+ ds = b->mm[i]; is = i - m_ofs; ys = 2;
+ }
+ }
+ }
+
+#else
+ if (sm <= tm) {
+ int h;
+ h = blog(sm ^ tm);
+
+ m_ofs = b->m_ofs;
+ sm += m_ofs; tm += m_ofs;
+
+ if (opt & OPT_MAX) {
+ if (b->mM[sm] + lr > ds) {
+ ds = b->mM[sm]; is = sm; ys = 2;
+ }
+ for (i=0; i<=h-2; i++) {
+ j = sm>>i;
+ if ((j&1) == 0) {
+ if (b->mM[j+1] + lr > ds) {
+ ds = b->mM[j+1]; is = j+1; ys = 2;
+ }
+ }
+ }
+ for (i=h-2; i>=0; i--) {
+ j = tm>>i;
+ if ((j&1)==1) {
+ if (b->mM[j-1] + lr > ds) {
+ ds = b->mM[j-1]; is = j-1; ys = 2;
+ }
+ }
+ }
+ if (b->mM[tm] + lr > ds) {
+ ds = b->mM[tm]; is = tm; ys = 2;
+ }
+ if (ys == 2) {
+ while (is < m_ofs) {
+ is <<= 1;
+ if (b->mM[is+1] + lr > b->mM[is]) is++;
+ }
+ is -= m_ofs;
+ }
+ } else { // MIN
+ if (b->mm[sm] < ds + lr) {
+ ds = b->mm[sm]; is = sm; ys = 2;
+ }
+ for (i=0; i<=h-2; i++) {
+ j = sm>>i;
+ if ((j&1) == 0) {
+ if (b->mm[j+1] < ds + lr) {
+ ds = b->mm[j+1]; is = j+1; ys = 2;
+ }
+ }
+ }
+ for (i=h-2; i>=0; i--) {
+ j = tm>>i;
+ if ((j&1)==1) {
+ if (b->mm[j-1] < ds + lr) {
+ ds = b->mm[j-1]; is = j-1; ys = 2;
+ }
+ }
+ }
+ if (b->mm[tm] < ds + lr) {
+ ds = b->mm[tm]; is = tm; ys = 2;
+ }
+ if (ys == 2) {
+ while (is < m_ofs) {
+ is <<= 1;
+ if (b->mm[is+1] < b->mm[is] + lr) is++;
+ }
+ is -= m_ofs;
+ }
+ }
+ }
+
+#endif
+
+ ////////////////////////////////////////////////////////////
+ // search the MB of t
+ ////////////////////////////////////////////////////////////
+
+
+ ts = max(SBid(MBfirst(t)),SBid(s)+1);
+ il = SBid(SBfirst(t)-1);
+ dm = ds;
+ j = rmq_MB(b,ts,il,opt,&dm);
+ //if (dm < ds + lr) {
+ if (j >= 0) {
+ ds = dm; is = j; ys = 1;
+ }
+
+ ////////////////////////////////////////////////////////////
+ // search the SB of t
+ ////////////////////////////////////////////////////////////
+
+ i = SBfirst(t);
+ j = rmq_SB(b,i,t,opt,&dm);
+ d = depth(b,i) + dm;
+ if (opt & OPT_MAX) {
+ if (d + lr > ds) {
+ ds = d; is = j; ys = 0;
+ }
+ } else {
+ if (d < ds + lr) {
+ ds = d; is = j; ys = 0;
+ }
+ }
+
+ ////////////////////////////////////////////////////////////
+ // search the rest
+ ////////////////////////////////////////////////////////////
+
+ if (ys == 2) {
+ ss = SBid(is << logMB);
+ il = SBid(MBlast(is << logMB));
+ if (opt & OPT_MAX) {
+ dm = -n-1;
+ } else {
+ dm = n+1;
+ }
+ j = rmq_MB(b,ss,il,opt,&dm);
+ ds = dm; is = j; ys = 1;
+ }
+
+ if (ys == 1) {
+ ss = is << logSB;
+ il = SBlast(is << logSB);
+ is = rmq_SB(b,ss,il,opt,&dm);
+ //ds = depth(b,ss) + dm; ys = 0;
+ }
+
+ return is;
+}
+
--- /dev/null
+#include <stdio.h>\r
+#include <stdlib.h>\r
+#include "darray.h"\r
+\r
+//typedef unsigned char byte;\r
+//typedef unsigned short word;\r
+//typedef unsigned int dword;\r
+\r
+//typedef dword pb;\r
+#define logD 5\r
+\r
+#define PBS (sizeof(pb)*8)\r
+#define D (1<<logD)\r
+#define logM 5\r
+#define M (1<<logM)\r
+#define logP 8\r
+#define P (1<<logP)\r
+#define logLL 16 // size of word\r
+#define LL (1<<logLL)\r
+#define logLLL 7\r
+//#define logLLL 5\r
+#define LLL (1<<logLLL)\r
+//#define logL 10\r
+//#define logL (logLL-3)\r
+#define logL (logLL-1-5)\r
+#define L (1<<logL)\r
+\r
+#ifndef min\r
+ #define min(x,y) ((x)<(y)?(x):(y))\r
+#endif\r
+\r
+#define mymalloc(p,n,f) {p = (__typeof__(p)) malloc((n)*sizeof(*p)); if ((p)==NULL) {printf("not enough memory\n"); exit(1);}}\r
+\r
+int setbit(pb *B, int i,int x)\r
+{\r
+ int j,l;\r
+\r
+ j = i / D;\r
+ l = i % D;\r
+ if (x==0) B[j] &= (~(1<<(D-1-l)));\r
+ else if (x==1) B[j] |= (1<<(D-1-l));\r
+ else {\r
+ printf("error setbit x=%d\n",x);\r
+ exit(1);\r
+ }\r
+ return x;\r
+}\r
+\r
+int setbits(pb *B, int i, int d, int x)\r
+{\r
+ int j;\r
+\r
+ for (j=0; j<d; j++) {\r
+ setbit(B,i+j,(x>>(d-j-1))&1);\r
+ }\r
+ return x;\r
+}\r
+\r
+int getbit(pb *B, int i)\r
+{\r
+ int j,l;\r
+\r
+ //j = i / D;\r
+ //l = i % D;\r
+ j = i >> logD;\r
+ l = i & (D-1);\r
+ return (B[j] >> (D-1-l)) & 1;\r
+}\r
+\r
+dword getbits(pb *B, int i, int d)\r
+{\r
+ dword j,x;\r
+\r
+ x = 0;\r
+ for (j=0; j<d; j++) {\r
+ x <<= 1;\r
+ x += getbit(B,i+j);\r
+ }\r
+ return x;\r
+}\r
+\r
+int getpattern(pb *B, int i, int k, pb pat)\r
+{\r
+ int j;\r
+ int x;\r
+ x = 1;\r
+ for (j=0; j<k; j++) {\r
+ x &= getbit(B,i+j) ^ (~(pat>>(k-1-j)));\r
+ }\r
+ //printf("getpattern(%d) = %d\n",i,x);\r
+ return x;\r
+}\r
+\r
+\r
+static const unsigned int popCount[] = {\r
+0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,\r
+1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,\r
+1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,\r
+2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,\r
+1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,\r
+2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,\r
+2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,\r
+3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,\r
+1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,\r
+2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,\r
+2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,\r
+3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,\r
+2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,\r
+3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,\r
+3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,\r
+4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8\r
+};\r
+\r
+static int selecttbl[8*256];\r
+\r
+unsigned int popcount(pb x)\r
+{\r
+ pb r;\r
+#if 0\r
+ r = x;\r
+ r = r - ((r>>1) & 0x77777777) - ((r>>2) & 0x33333333) - ((r>>3) & 0x11111111);\r
+ r = ((r + (r>>4)) & 0x0f0f0f0f) % 0xff;\r
+#elif 1\r
+ r = x;\r
+ r = ((r & 0xaaaaaaaa)>>1) + (r & 0x55555555);\r
+ r = ((r & 0xcccccccc)>>2) + (r & 0x33333333);\r
+ //r = ((r & 0xf0f0f0f0)>>4) + (r & 0x0f0f0f0f);\r
+ r = ((r>>4) + r) & 0x0f0f0f0f;\r
+ //r = ((r & 0xff00ff00)>>8) + (r & 0x00ff00ff);\r
+ r = (r>>8) + r;\r
+ //r = ((r & 0xffff0000)>>16) + (r & 0x0000ffff);\r
+ r = ((r>>16) + r) & 63;\r
+#else\r
+ r = popCount[x & 0xff];\r
+ x >>= 8;\r
+ r += popCount[x & 0xff];\r
+ x >>= 8;\r
+ r += popCount[x & 0xff];\r
+ x >>= 8;\r
+ r += popCount[x & 0xff];\r
+#endif\r
+ return r;\r
+}\r
+\r
+unsigned int popcount8(pb x)\r
+{\r
+ dword r;\r
+#if 1\r
+ r = x;\r
+ r = ((r & 0xaa)>>1) + (r & 0x55);\r
+ r = ((r & 0xcc)>>2) + (r & 0x33);\r
+ r = ((r>>4) + r) & 0x0f;\r
+#else\r
+ r = popCount[x & 0xff];\r
+#endif\r
+ return r;\r
+}\r
+\r
+void make_selecttbl(void)\r
+{\r
+ int i,x,r;\r
+ pb buf[1];\r
+\r
+ for (x = 0; x < 256; x++) {\r
+ setbits(buf,0,8,x);\r
+ for (r=0; r<8; r++) selecttbl[(r<<8)+x] = -1;\r
+ r = 0;\r
+ for (i=0; i<8; i++) {\r
+ if (getbit(buf,i)) {\r
+ selecttbl[(r<<8)+x] = i;\r
+ r++;\r
+ }\r
+ }\r
+ }\r
+}\r
+\r
+\r
+int darray_construct(darray *da, int n, pb *buf, int opt)\r
+{\r
+ int i,j,k,m;\r
+ int nl;\r
+ int p,pp;\r
+ int il,is,ml,ms;\r
+ int r,m2;\r
+ dword *s;\r
+ int p1,p2,p3,p4,s1,s2,s3,s4;\r
+\r
+ da->idx_size = 0;\r
+\r
+ make_selecttbl();\r
+\r
+\r
+ if (L/LLL == 0) {\r
+ printf("ERROR: L=%d LLL=%d\n",L,LLL);\r
+ exit(1);\r
+ }\r
+\r
+ m = 0;\r
+ for (i=0; i<n; i++) m += getbit(buf,i);\r
+ da->n = n;\r
+ da->m = m;\r
+ //printf("n=%d m=%d\n",n,m);\r
+\r
+ da->buf = buf;\r
+\r
+ if (opt & (~OPT_NO_RANK)) { // construct select table\r
+#if 0\r
+ mymalloc(s,m,0);\r
+ m = 0;\r
+ for (i=0; i<n; i++) {\r
+ if (getbit(buf,i)) {\r
+ m++;\r
+ s[m-1] = i;\r
+ }\r
+ }\r
+#endif \r
+ nl = (m-1) / L + 1;\r
+ mymalloc(da->lp,nl+1,1); da->idx_size += (nl+1)*sizeof(*da->lp);\r
+ mymalloc(da->p,nl+1,1); da->idx_size += (nl+1)*sizeof(*da->p);\r
+#if 0\r
+ printf("lp table: %d bytes (%1.2f bpc)\n",(nl+1)*sizeof(*da->lp), (double)(nl+1)*sizeof(*da->lp) * 8/n);\r
+ printf("p table: %d bytes (%1.2f bpc)\n",(nl+1)*sizeof(*da->p), (double)(nl+1)*sizeof(*da->p) * 8/n);\r
+#endif \r
+\r
+ for (r = 0; r < 2; r++) {\r
+ s1 = s2 = s3 = s4 = 0;\r
+ p1 = p2 = p3 = p4 = -1;\r
+ \r
+ ml = ms = 0;\r
+ for (il = 0; il < nl; il++) {\r
+ //pp = s[il*L];\r
+ while (s1 <= il*L) {\r
+ if (getbit(buf,p1+1)) s1++;\r
+ p1++;\r
+ }\r
+ pp = p1;\r
+ da->lp[il] = pp;\r
+ i = min((il+1)*L-1,m-1);\r
+ //p = s[i];\r
+ while (s2 <= i) {\r
+ if (getbit(buf,p2+1)) s2++;\r
+ p2++;\r
+ }\r
+ p = p2;\r
+ //printf("%d ",p-pp);\r
+ if (p - pp >= LL) {\r
+ if (r == 1) {\r
+ for (is = 0; is < L; is++) {\r
+ if (il*L+is >= m) break;\r
+ //da->sl[ml*L+is] = s[il*L+is];\r
+ while (s3 <= il*L+is) {\r
+ if (getbit(buf,p3+1)) s3++;\r
+ p3++;\r
+ }\r
+ da->sl[ml*L+is] = p3;\r
+ }\r
+ }\r
+ da->p[il] = -(ml+1);\r
+ ml++;\r
+ } else {\r
+ if (r == 1) {\r
+ for (is = 0; is < L/LLL; is++) {\r
+ if (il*L+is*LLL >= m) break;\r
+ while (s4 <= il*L+is*LLL) {\r
+ if (getbit(buf,p4+1)) s4++;\r
+ p4++;\r
+ }\r
+ //da->ss[ms*(L/LLL)+is] = s[il*L+is*LLL] - pp;\r
+ da->ss[ms*(L/LLL)+is] = p4 - pp;\r
+ }\r
+ }\r
+ da->p[il] = ms;\r
+ ms++;\r
+ }\r
+ }\r
+ if (r == 0) {\r
+ mymalloc(da->sl,ml*L+1,1); da->idx_size += (ml*L+1)*sizeof(*da->sl);\r
+ mymalloc(da->ss,ms*(L/LLL)+1,1); da->idx_size += (ms*(L/LLL)+1)*sizeof(*da->ss);\r
+#if 0\r
+ printf("sl table: %d bytes (%1.2f bpc)\n",(ml*L+1)*sizeof(*da->sl), (double)(ml*L+1)*sizeof(*da->sl) * 8/n);\r
+ printf("ss table: %d bytes (%1.2f bpc)\n",(ms*(L/LLL)+1)*sizeof(*da->ss), (double)(ms*(L/LLL)+1)*sizeof(*da->ss) * 8/n);\r
+#endif\r
+ }\r
+ }\r
+ //free(s);\r
+ } else { // no select table\r
+ da->lp = NULL;\r
+ da->p = da->sl = NULL;\r
+ da->ss = NULL;\r
+ }\r
+\r
+ // construct rank table\r
+\r
+ if ((opt & OPT_NO_RANK) == 0) {\r
+ mymalloc(da->rl,n/R1+2,1); da->idx_size += (n/R1+2)*sizeof(*da->rl);\r
+ mymalloc(da->rm,n/RR+2,1); da->idx_size += (n/RR+2)*sizeof(*da->rm);\r
+ mymalloc(da->rs,n/RRR+2,1); da->idx_size += (n/RRR+2)*sizeof(*da->rs);\r
+#if 0\r
+ printf("rl table: %d bytes (%1.2f bpc)\n",(n/R1+2)*sizeof(*da->rl), (double)(n/R1+2)*sizeof(*da->rl) * 8/n);\r
+ printf("rm table: %d bytes (%1.2f bpc)\n",(n/RR+2)*sizeof(*da->rm), (double)(n/RR+2)*sizeof(*da->rm) * 8/n);\r
+ printf("rs table: %d bytes (%1.2f bpc)\n",(n/RRR+2)*sizeof(*da->rs), (double)(n/RRR+2)*sizeof(*da->rs) * 8/n);\r
+#endif\r
+ r = 0;\r
+ for (k=0; k<=n; k+=R1) {\r
+ da->rl[k/R1] = r;\r
+ m2 = 0;\r
+ for (i=0; i<R1; i+=RR) {\r
+ if (k+i <= n) da->rm[(k+i)/RR] = m2;\r
+ m = 0;\r
+ for (j=0; j<RR; j++) {\r
+ if (k+i+j < n && getbit(buf,k+i+j)==1) m++;\r
+ if (j % RRR == RRR-1) {\r
+ if (k+i+j <= n) da->rs[(k+i+j)/RRR] = m;\r
+ m2 += m;\r
+ m = 0;\r
+ }\r
+ }\r
+ if (m != 0) {\r
+ printf("???\n");\r
+ }\r
+ //m2 += m;\r
+ }\r
+ r += m2;\r
+ }\r
+ }\r
+\r
+ return 0;\r
+}\r
+\r
+// destroyDarray: frees the memory of darray\r
+// Added by Diego Arroyuelo\r
+void destroyDarray(darray *da) {\r
+ if (!da) return; // nothing to free\r
+ \r
+ if (da->buf) free(da->buf);\r
+ if (da->lp) free(da->lp);\r
+ if (da->sl) free(da->sl);\r
+ if (da->ss) free(da->ss);\r
+ if (da->p) free(da->p);\r
+ if (da->rl) free(da->rl);\r
+ if (da->rm) free(da->rm);\r
+ if (da->rs) free(da->rs);\r
+}\r
+\r
+int darray_rank0(darray *da, int i)\r
+{\r
+ int r,j;\r
+ pb *p;\r
+\r
+#if (RRR == D*2)\r
+ r = da->rl[i>>logR] + da->rm[i>>logRR] + da->rs[i>>logRRR];\r
+ p = da->buf + ((i>>logRRR)<<(logRRR-logD));\r
+ j = i & (RRR-1);\r
+ if (j < D) r += popcount(*p >> (D-1-j));\r
+ else r += popcount(*p) + popcount(p[1] >> (D-1-(j-D)));\r
+#else\r
+\r
+ j = i & (RRR-1);\r
+ if (j < RRR/2) {\r
+ r = da->rl[i>>logR] + da->rm[i>>logRR] + da->rs[i>>logRRR];\r
+ p = da->buf + ((i>>logRRR)<<(logRRR-logD));\r
+ while (j >= D) {\r
+ r += popcount(*p++);\r
+ j -= D;\r
+ }\r
+ r += popcount(*p >> (D-1-j));\r
+ } else {\r
+ j = RRR-1 - (i & (RRR-1));\r
+ i += j+1;\r
+ r = da->rl[i>>logR] + da->rm[i>>logRR] + da->rs[i>>logRRR];\r
+ p = da->buf + ((i>>logRRR)<<(logRRR-logD));\r
+ while (j >= D) {\r
+ r -= popcount(*--p);\r
+ j -= D;\r
+ }\r
+ if (j > 0) r -= popcount(*--p << (D-j));\r
+ }\r
+\r
+#endif\r
+\r
+ return r;\r
+}\r
+\r
+int darray_rank(darray *da, int i)\r
+{\r
+ int r,j;\r
+ pb *p;\r
+\r
+ r = da->rl[i>>logR] + da->rm[i>>logRR];\r
+ j = (i>>logRRR) & (RR/RRR-1);\r
+ while (j > 0) {\r
+ r += da->rs[((i>>logRR)<<(logRR-logRRR))+j-1];\r
+ j--;\r
+ }\r
+\r
+ p = da->buf + ((i>>logRRR)<<(logRRR-logD));\r
+ j = i & (RRR-1);\r
+ while (j >= D) {\r
+ r += popcount(*p++);\r
+ j -= D;\r
+ }\r
+ r += popcount(*p >> (D-1-j));\r
+\r
+ return r;\r
+}\r
+\r
+int darray_select_bsearch(darray *da, int i, pb (*getpat)(pb *))\r
+{\r
+ int j;\r
+ int l,r,m,n;\r
+ int s;\r
+ int t,x,rr;\r
+ pb *p,w;\r
+\r
+ // for debug\r
+ //s = darray_select(da,i,1);\r
+ //\r
+ //printf("select(%d)=%d\n",i,s);\r
+\r
+\r
+\r
+ if (i == 0) return -1;\r
+\r
+ if (i > da->m) {\r
+ return -1;\r
+ }\r
+\r
+ i--;\r
+\r
+\r
+\r
+ n = da->m;\r
+\r
+ t = i;\r
+\r
+ l = 0; r = (n-1)>>logR;\r
+ // find the smallest index x s.t. rl[x] >= t\r
+ while (l < r) {\r
+ m = (l+r)/2;\r
+ //printf("m=%d rl[m+1]=%d t=%d\n",m,da->rl[m+1],t);\r
+ if (da->rl[m+1] > t) { // m+1 is out of range\r
+ r = m; // new r = m >= l\r
+ } else {\r
+ l = m+1; // new l = m+1 <= r\r
+ }\r
+ }\r
+ x = l;\r
+ t -= da->rl[x];\r
+\r
+ x <<= logR;\r
+\r
+ l = x >> logRR; r = (min(x+R1-1,n))>>logRR;\r
+ while (l < r) {\r
+ m = (l+r)/2;\r
+ //printf("m=%d rm[m+1]=%d t=%d\n",m,da->rm[m+1],t);\r
+ if (da->rm[m+1] > t) { // m+1 is out of range\r
+ r = m;\r
+ } else {\r
+ l = m+1; // new l = m+1 <= r\r
+ }\r
+ }\r
+ x = l;\r
+ t -= da->rm[x];\r
+\r
+ x <<= logRR;\r
+\r
+#if 0\r
+ l = x >> logRRR; r = (min(x+RR-1,n))>>logRRR;\r
+ while (l < r) {\r
+ m = (l+r)/2;\r
+ //printf("m=%d rs[m+1]=%d t=%d\n",m,da->rs[m+1],t);\r
+ if (da->rs[m+1] > t) { // m+1 is out of range\r
+ r = m;\r
+ } else {\r
+ l = m+1; // new l = m+1 <= r\r
+ }\r
+ }\r
+ x = l;\r
+ t -= da->rs[x];\r
+#else\r
+ l = x >> logRRR;\r
+ while (t > da->rs[l]) {\r
+ t -= da->rs[l];\r
+ l++;\r
+ }\r
+ x = l;\r
+#endif\r
+\r
+ x <<= logRRR;\r
+\r
+ p = &da->buf[x >> logD];\r
+ while (1) {\r
+ m = popcount(getpat(p));\r
+ if (m > t) break;\r
+ t -= m;\r
+ x += D;\r
+ p++;\r
+ }\r
+\r
+ w = getpat(p);\r
+ while (1) {\r
+ rr = popCount[w >> (D-8)];\r
+ if (rr > t) break;\r
+ t -= rr;\r
+ x += 8;\r
+ w <<= 8;\r
+ }\r
+ x += selecttbl[((t-0)<<8)+(w>>(D-8))];\r
+\r
+#if 0\r
+ if (x != s) {\r
+ printf("error x=%d s=%d\n",x,s);\r
+ }\r
+#endif\r
+ return x;\r
+}\r
+\r
+int darray_pat_rank(darray *da, int i, pb (*getpat)(pb *))\r
+{\r
+ int r,j;\r
+ pb *p;\r
+\r
+ r = da->rl[i>>logR] + da->rm[i>>logRR];\r
+ j = (i>>logRRR) & (RR/RRR-1);\r
+ while (j > 0) {\r
+ r += da->rs[((i>>logRR)<<(logRR-logRRR))+j-1];\r
+ j--;\r
+ }\r
+\r
+ p = da->buf + ((i>>logRRR)<<(logRRR-logD));\r
+ j = i & (RRR-1);\r
+ while (j >= D) {\r
+ r += popcount(getpat(p));\r
+ p++;\r
+ j -= D;\r
+ }\r
+ r += popcount(getpat(p) >> (D-1-j));\r
+\r
+ return r;\r
+}\r
+\r
+\r
+int darray_select(darray *da, int i,int f)\r
+{\r
+ int p,r;\r
+ int il;\r
+ int rr;\r
+ pb x;\r
+ pb *q;\r
+\r
+ if (i == 0) return -1;\r
+\r
+ if (i > da->m) {\r
+ return -1;\r
+ //printf("ERROR: m=%d i=%d\n",da->m,i);\r
+ //exit(1);\r
+ }\r
+\r
+ i--;\r
+\r
+ il = da->p[i>>logL];\r
+ if (il < 0) {\r
+ il = -il-1;\r
+ p = da->sl[(il<<logL)+(i & (L-1))];\r
+ } else {\r
+ p = da->lp[i>>logL];\r
+ p += da->ss[(il<<(logL-logLLL))+(i & (L-1))/LLL];\r
+ r = i - (i & (LLL-1));\r
+\r
+ q = &(da->buf[p>>logD]);\r
+\r
+ if (f == 1) {\r
+ rr = p & (D-1);\r
+ r -= popcount(*q >> (D-1-rr));\r
+ p = p - rr;\r
+ \r
+ while (1) {\r
+ rr = popcount(*q);\r
+ if (r + rr >= i) break;\r
+ r += rr;\r
+ p += D;\r
+ q++;\r
+ }\r
+ \r
+ x = *q;\r
+ while (1) {\r
+ //rr = popcount(x >> (D-8));\r
+ rr = popCount[x >> (D-8)];\r
+ //rr = popcount8(x >> (D-8));\r
+ if (r + rr >= i) break;\r
+ r += rr;\r
+ p += 8;\r
+ x <<= 8;\r
+ }\r
+ p += selecttbl[((i-r-1)<<8)+(x>>(D-8))];\r
+ } else {\r
+ rr = p & (D-1);\r
+ r -= popcount((~(*q)) >> (D-1-rr));\r
+ p = p - rr;\r
+ \r
+ while (1) {\r
+ rr = popcount(~(*q));\r
+ if (r + rr >= i) break;\r
+ r += rr;\r
+ p += D;\r
+ q++;\r
+ }\r
+ \r
+ x = ~(*q);\r
+\r
+ while (1) {\r
+ //rr = popcount(x >> (D-8));\r
+ rr = popCount[x >> (D-8)];\r
+ //rr = popcount8(x >> (D-8));\r
+ if (r + rr >= i) break;\r
+ r += rr;\r
+ p += 8;\r
+ x <<= 8;\r
+ }\r
+ p += selecttbl[((i-r-1)<<8)+(x>>(D-8))];\r
+ }\r
+ }\r
+ return p;\r
+}\r
+\r
+int darray_pat_select(darray *da, int i, pb (*getpat)(pb *))\r
+{\r
+ int p,r;\r
+ int il;\r
+ int rr;\r
+ pb x;\r
+ pb *q;\r
+\r
+ if (i == 0) return -1;\r
+\r
+ if (i > da->m) {\r
+ return -1;\r
+ //printf("ERROR: m=%d i=%d\n",da->m,i);\r
+ //exit(1);\r
+ }\r
+\r
+ i--;\r
+\r
+ il = da->p[i>>logL];\r
+ if (il < 0) {\r
+ il = -il-1;\r
+ p = da->sl[(il<<logL)+(i & (L-1))];\r
+ } else {\r
+ p = da->lp[i>>logL];\r
+ p += da->ss[(il<<(logL-logLLL))+(i & (L-1))/LLL];\r
+ r = i - (i & (LLL-1));\r
+\r
+ q = &(da->buf[p>>logD]);\r
+\r
+ rr = p & (D-1);\r
+ r -= popcount(getpat(q) >> (D-1-rr));\r
+ p = p - rr;\r
+ \r
+ while (1) {\r
+ rr = popcount(getpat(q));\r
+ if (r + rr >= i) break;\r
+ r += rr;\r
+ p += D;\r
+ q++;\r
+ }\r
+ \r
+ x = getpat(q);\r
+ while (1) {\r
+ //rr = popcount(x >> (D-8));\r
+ rr = popCount[x >> (D-8)];\r
+ //rr = popcount8(x >> (D-8));\r
+ if (r + rr >= i) break;\r
+ r += rr;\r
+ p += 8;\r
+ x <<= 8;\r
+ }\r
+ p += selecttbl[((i-r-1)<<8)+(x>>(D-8))];\r
+ }\r
+ return p;\r
+}\r
+\r
+int darray_pat_construct(darray *da, int n, pb *buf, int k, pb pat, int opt)\r
+{\r
+ int i;\r
+ pb *b;\r
+ mymalloc(b,(n+D-1)/D,0);\r
+\r
+ for (i=0; i<n-k+1; i++) {\r
+ setbit(b,i,getpattern(buf,i,k,pat));\r
+ }\r
+ for (i=n-k+1; i<n; i++) {\r
+ setbit(b,i,0);\r
+ }\r
+\r
+ darray_construct(da,n,b,opt);\r
+ da->buf = buf;\r
+\r
+ free(b);\r
+ \r
+ return 0;\r
+}\r
--- /dev/null
+typedef unsigned char byte;
+typedef unsigned short word;
+typedef unsigned int dword;
+
+#define OPT_NO_RANK (1<<30)
+
+
+typedef dword pb;
+
+#define logD 5
+#define D (1<<logD)
+
+#define logR 16
+#define R1 (1<<logR)
+#define logRR 10
+//#define logRR 8
+#define RR (1<<logRR)
+#define logRRR 7
+#define RRR (1<<logRRR)
+
+typedef struct {
+ int n,m;
+ int size;
+ pb *buf;
+ dword *lp;
+ dword *sl;
+ word *ss;
+ dword *p;
+
+ dword *rl;
+ word *rm;
+ byte *rs;
+
+ int idx_size;
+} darray;
+
+int setbit(pb *B, int i,int x);
+int setbits(pb *B, int i, int d, int x);
+int getbit(pb *B, int i);
+dword getbits(pb *B, int i, int d);
+unsigned int popcount(pb x);
+
+int darray_construct(darray *da, int n, pb *buf,int opt);
+int darray_select(darray *da, int i,int f);
+int darray_rank(darray *da, int i);
+int darray_pat_construct(darray *da, int n, pb *buf, int k, pb pat, int opt);
+int darray_pat_select(darray *da, int i, pb (*getpat)(pb *));
+int darray_pat_rank(darray *da, int i, pb (*getpat)(pb *));
+
+int darray_select_bsearch(darray *da, int i, pb (*getpat)(pb *));
+
+// Added by Diego Arroyuelo
+void destroyDarray(darray *da);
--- /dev/null
+# Doxyfile 1.5.5
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+DOXYFILE_ENCODING = UTF-8
+PROJECT_NAME = libcds
+PROJECT_NUMBER = 1.0
+OUTPUT_DIRECTORY = docs/
+CREATE_SUBDIRS = NO
+OUTPUT_LANGUAGE = English
+BRIEF_MEMBER_DESC = YES
+REPEAT_BRIEF = YES
+ABBREVIATE_BRIEF = "The $name class" \
+ "The $name widget" \
+ "The $name file" \
+ is \
+ provides \
+ specifies \
+ contains \
+ represents \
+ a \
+ an \
+ the
+ALWAYS_DETAILED_SEC = NO
+INLINE_INHERITED_MEMB = NO
+FULL_PATH_NAMES = YES
+STRIP_FROM_PATH = src/
+STRIP_FROM_INC_PATH =
+SHORT_NAMES = NO
+JAVADOC_AUTOBRIEF = NO
+QT_AUTOBRIEF = NO
+MULTILINE_CPP_IS_BRIEF = NO
+DETAILS_AT_TOP = NO
+INHERIT_DOCS = YES
+SEPARATE_MEMBER_PAGES = NO
+TAB_SIZE = 2
+ALIASES =
+OPTIMIZE_OUTPUT_FOR_C = NO
+OPTIMIZE_OUTPUT_JAVA = NO
+OPTIMIZE_FOR_FORTRAN = NO
+OPTIMIZE_OUTPUT_VHDL = NO
+BUILTIN_STL_SUPPORT = NO
+CPP_CLI_SUPPORT = NO
+SIP_SUPPORT = NO
+DISTRIBUTE_GROUP_DOC = NO
+SUBGROUPING = YES
+TYPEDEF_HIDES_STRUCT = NO
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+EXTRACT_ALL = YES
+EXTRACT_PRIVATE = YES
+EXTRACT_STATIC = YES
+EXTRACT_LOCAL_CLASSES = YES
+EXTRACT_LOCAL_METHODS = NO
+EXTRACT_ANON_NSPACES = NO
+HIDE_UNDOC_MEMBERS = NO
+HIDE_UNDOC_CLASSES = NO
+HIDE_FRIEND_COMPOUNDS = NO
+HIDE_IN_BODY_DOCS = NO
+INTERNAL_DOCS = NO
+CASE_SENSE_NAMES = YES
+HIDE_SCOPE_NAMES = NO
+SHOW_INCLUDE_FILES = YES
+INLINE_INFO = YES
+SORT_MEMBER_DOCS = YES
+SORT_BRIEF_DOCS = NO
+SORT_GROUP_NAMES = NO
+SORT_BY_SCOPE_NAME = NO
+GENERATE_TODOLIST = YES
+GENERATE_TESTLIST = YES
+GENERATE_BUGLIST = YES
+GENERATE_DEPRECATEDLIST= YES
+ENABLED_SECTIONS =
+MAX_INITIALIZER_LINES = 30
+SHOW_USED_FILES = YES
+SHOW_DIRECTORIES = NO
+FILE_VERSION_FILTER =
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+QUIET = NO
+WARNINGS = YES
+WARN_IF_UNDOCUMENTED = YES
+WARN_IF_DOC_ERROR = YES
+WARN_NO_PARAMDOC = NO
+WARN_FORMAT = "$file:$line: $text"
+WARN_LOGFILE =
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+INPUT = src
+INPUT_ENCODING = UTF-8
+FILE_PATTERNS = *.c \
+ *.cc \
+ *.cxx \
+ *.cpp \
+ *.c++ \
+ *.d \
+ *.java \
+ *.ii \
+ *.ixx \
+ *.ipp \
+ *.i++ \
+ *.inl \
+ *.h \
+ *.hh \
+ *.hxx \
+ *.hpp \
+ *.h++ \
+ *.idl \
+ *.odl \
+ *.cs \
+ *.php \
+ *.php3 \
+ *.inc \
+ *.m \
+ *.mm \
+ *.dox \
+ *.py \
+ *.f90 \
+ *.f \
+ *.vhd \
+ *.vhdl \
+ *.C \
+ *.CC \
+ *.C++ \
+ *.II \
+ *.I++ \
+ *.H \
+ *.HH \
+ *.H++ \
+ *.CS \
+ *.PHP \
+ *.PHP3 \
+ *.M \
+ *.MM \
+ *.PY \
+ *.F90 \
+ *.F \
+ *.VHD \
+ *.VHDL
+RECURSIVE = YES
+EXCLUDE =
+EXCLUDE_SYMLINKS = NO
+EXCLUDE_PATTERNS =
+EXCLUDE_SYMBOLS =
+EXAMPLE_PATH =
+EXAMPLE_PATTERNS = *
+EXAMPLE_RECURSIVE = NO
+IMAGE_PATH =
+INPUT_FILTER =
+FILTER_PATTERNS =
+FILTER_SOURCE_FILES = NO
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+SOURCE_BROWSER = YES
+INLINE_SOURCES = NO
+STRIP_CODE_COMMENTS = YES
+REFERENCED_BY_RELATION = NO
+REFERENCES_RELATION = NO
+REFERENCES_LINK_SOURCE = YES
+USE_HTAGS = NO
+VERBATIM_HEADERS = NO
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+ALPHABETICAL_INDEX = NO
+COLS_IN_ALPHA_INDEX = 5
+IGNORE_PREFIX =
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+GENERATE_HTML = YES
+HTML_OUTPUT = html
+HTML_FILE_EXTENSION = .html
+HTML_HEADER =
+HTML_FOOTER =
+HTML_STYLESHEET =
+HTML_ALIGN_MEMBERS = YES
+GENERATE_HTMLHELP = NO
+GENERATE_DOCSET = NO
+DOCSET_FEEDNAME = "Doxygen generated docs"
+DOCSET_BUNDLE_ID = org.doxygen.Project
+HTML_DYNAMIC_SECTIONS = NO
+CHM_FILE =
+HHC_LOCATION =
+GENERATE_CHI = NO
+BINARY_TOC = NO
+TOC_EXPAND = NO
+DISABLE_INDEX = NO
+ENUM_VALUES_PER_LINE = 4
+GENERATE_TREEVIEW = NO
+TREEVIEW_WIDTH = 250
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+GENERATE_LATEX = YES
+LATEX_OUTPUT = latex
+LATEX_CMD_NAME = latex
+MAKEINDEX_CMD_NAME = makeindex
+COMPACT_LATEX = NO
+PAPER_TYPE = a4wide
+EXTRA_PACKAGES =
+LATEX_HEADER =
+PDF_HYPERLINKS = YES
+USE_PDFLATEX = YES
+LATEX_BATCHMODE = NO
+LATEX_HIDE_INDICES = NO
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+GENERATE_RTF = NO
+RTF_OUTPUT = rtf
+COMPACT_RTF = NO
+RTF_HYPERLINKS = NO
+RTF_STYLESHEET_FILE =
+RTF_EXTENSIONS_FILE =
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+GENERATE_MAN = NO
+MAN_OUTPUT = man
+MAN_EXTENSION = .3
+MAN_LINKS = NO
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+GENERATE_XML = NO
+XML_OUTPUT = xml
+XML_SCHEMA =
+XML_DTD =
+XML_PROGRAMLISTING = YES
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+GENERATE_AUTOGEN_DEF = NO
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+GENERATE_PERLMOD = NO
+PERLMOD_LATEX = NO
+PERLMOD_PRETTY = YES
+PERLMOD_MAKEVAR_PREFIX =
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+ENABLE_PREPROCESSING = YES
+MACRO_EXPANSION = NO
+EXPAND_ONLY_PREDEF = NO
+SEARCH_INCLUDES = YES
+INCLUDE_PATH =
+INCLUDE_FILE_PATTERNS =
+PREDEFINED =
+EXPAND_AS_DEFINED =
+SKIP_FUNCTION_MACROS = YES
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references
+#---------------------------------------------------------------------------
+TAGFILES =
+GENERATE_TAGFILE =
+ALLEXTERNALS = NO
+EXTERNAL_GROUPS = YES
+PERL_PATH = /usr/bin/perl
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+CLASS_DIAGRAMS = NO
+MSCGEN_PATH =
+HIDE_UNDOC_RELATIONS = YES
+HAVE_DOT = YES
+CLASS_GRAPH = YES
+COLLABORATION_GRAPH = YES
+GROUP_GRAPHS = YES
+UML_LOOK = NO
+TEMPLATE_RELATIONS = NO
+INCLUDE_GRAPH = YES
+INCLUDED_BY_GRAPH = YES
+CALL_GRAPH = YES
+CALLER_GRAPH = NO
+GRAPHICAL_HIERARCHY = YES
+DIRECTORY_GRAPH = YES
+DOT_IMAGE_FORMAT = png
+DOT_PATH =
+DOTFILE_DIRS =
+DOT_GRAPH_MAX_NODES = 50
+MAX_DOT_GRAPH_DEPTH = 1000
+DOT_TRANSPARENT = YES
+DOT_MULTI_TARGETS = NO
+GENERATE_LEGEND = YES
+DOT_CLEANUP = YES
+#---------------------------------------------------------------------------
+# Configuration::additions related to the search engine
+#---------------------------------------------------------------------------
+SEARCHENGINE = NO
--- /dev/null
+
+all: libcompact tests
+
+
+doc:
+ doxygen
+
+libcompact:
+ make -C src
+
+tests: libcompact
+ make -C tests
+
+clean:
+ make -C src clean
+ make -C tests clean
+ rm -rf docs/*
+ touch docs/delete_me
+ rm -f lib/*
+ touch lib/delete_me
+ rm -f includes/*
+ touch includes/delete_me
+
+
--- /dev/null
+CPP=g++
+
+CPPFLAGS=-g3 -Wall
+#CPPFLAGS=-O9 -Wall -DNDEBUG -pedantic
+
+INCL=-I../includes/
+
+CODERS_DIR=coders
+CODERS_OBJECTS=$(CODERS_DIR)/huff.o $(CODERS_DIR)/huffman_codes.o
+
+STATIC_BITSEQUENCE_DIR=static_bitsequence
+STATIC_BITSEQUENCE_OBJECTS=$(STATIC_BITSEQUENCE_DIR)/static_bitsequence.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_naive.o $(STATIC_BITSEQUENCE_DIR)/table_offset.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_rrr02.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_brw32.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_builder_rrr02.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_builder_brw32.o
+
+STATIC_SEQUENCE_DIR=static_sequence
+STATIC_SEQUENCE_OBJECTS=$(STATIC_SEQUENCE_DIR)/static_sequence.o $(STATIC_SEQUENCE_DIR)/static_sequence_wvtree.o $(STATIC_SEQUENCE_DIR)/wt_coder_binary.o $(STATIC_SEQUENCE_DIR)/wt_coder_huff.o $(STATIC_SEQUENCE_DIR)/wt_node_internal.o $(STATIC_SEQUENCE_DIR)/wt_node_leaf.o $(STATIC_SEQUENCE_DIR)/wt_coder.o $(STATIC_SEQUENCE_DIR)/wt_node.o
+
+UTILS_DIR=utils
+UTILS_OBJECTS=$(UTILS_DIR)/alphabet_mapper_none.o $(UTILS_DIR)/alphabet_mapper.o
+
+%.o: %.cpp
+ $(CPP) $(CPPFLAGS) $(INCL) -c $< -o $@
+
+all: lib
+
+clean:
+ rm -f $(CODERS_OBJECTS) $(STATIC_BITSEQUENCE_OBJECTS) $(STATIC_SEQUENCE_OBJECTS) $(UTILS_OBJECTS)
+
+lib: pre $(CODERS_OBJECTS) $(STATIC_BITSEQUENCE_OBJECTS) $(STATIC_SEQUENCE_OBJECTS) $(UTILS_OBJECTS)
+ ar vrcs ../lib/libcds.a $(CODERS_OBJECTS) $(STATIC_BITSEQUENCE_OBJECTS) $(STATIC_SEQUENCE_OBJECTS) $(UTILS_OBJECTS)
+
+pre:
+ cp basics.h ../includes/
+ cp */*.h ../includes/
+
--- /dev/null
+/* basics.h
+ * Copyright (C) 2008, Rodrigo Gonzalez & Francisco Claude, all rights reserved.
+ *
+ * Some preliminary stuff
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+
+#ifndef _BASICS_H
+#define _BASICS_H
+
+#include <iostream>
+using namespace std;
+#include <cstdlib>
+#include <cmath>
+
+
+/** mask for obtaining the first 5 bits */
+#define mask31 0x0000001F
+
+/** max function */
+#define max(x,y) ((x)>(y)?(x):(y))
+/** min function */
+#define min(x,y) ((x)<(y)?(x):(y))
+
+
+/** number of bits in a uint */
+#define W 32
+
+/** W-1 */
+#define Wminusone 31
+
+/** 2W*/
+#define WW 64
+
+/** number of bits per uchar */
+#define bitsM 8
+
+/** number of bytes per uint */
+#define BW 4
+
+/** uchar = unsigned char */
+#ifndef uchar
+#define uchar unsigned char
+#endif
+
+/** ushort = unsigned short */
+#ifndef ushort
+#define ushort unsigned short
+#endif
+
+/** ulong = unsigned long */
+#ifndef ulong
+#define ulong unsigned long
+#endif
+
+/** uint = unsigned int */
+#ifndef uint
+#define uint unsigned int
+#endif
+
+/** number of different uchar values 0..255 */
+#define size_uchar 256
+
+/** popcount array for uchars */
+const unsigned char __popcount_tab[] = {
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
+};
+
+/** select array for uchars */
+const unsigned char select_tab[] = {
+ 0, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1,
+ 6, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1,
+ 7, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1,
+ 6, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1,
+ 8, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1,
+ 6, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1,
+ 7, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1,
+ 6, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1,
+};
+
+/** prev array for uchars */
+const unsigned char prev_tab[] = {
+ 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+};
+
+
+
+/** bits needed to represent a number between 0 and n */
+inline uint bits(uint n){
+ uint b = 0;
+ while (n) { b++; n >>= 1; }
+ return b;
+}
+
+/** reads bit p from e */
+#define bitget(e,p) ((((e)[(p)/W] >> ((p)%W))) & 1)
+
+/** sets bit p in e */
+#define bitset(e,p) ((e)[(p)/W] |= (1<<((p)%W)))
+
+/** cleans bit p in e */
+#define bitclean(e,p) ((e)[(p)/W] &= ~(1<<((p)%W)))
+
+/** uints required to represent e integers of n bits each */
+#define uint_len(e,n) (((e)*(n))/W+(((e)*(n))%W > 0))
+
+/** Retrieve a given index from array A where every value uses len bits
+ * @param A Array
+ * @param len Length in bits of each field
+ * @param index Position to be retrieved
+ */
+inline uint get_field(uint *A, uint len, uint index) {
+ if(len==0) return 0;
+ register uint i=index*len/W, j=index*len-W*i, result;
+ if (j+len <= W)
+ result = (A[i] << (W-j-len)) >> (W-len);
+ else {
+ result = A[i] >> j;
+ result = result | (A[i+1] << (WW-j-len)) >> (W-len);
+ }
+ return result;
+}
+
+/** Store a given value in index into array A where every value uses len bits
+ * @param A Array
+ * @param len Length in bits of each field
+ * @param index Position to store in
+ * @param x Value to be stored
+ */
+inline void set_field(uint *A, uint len, uint index, uint x) {
+ if(len==0) return;
+ uint i=index*len/W, j=index*len-i*W;
+ uint mask = ((j+len) < W ? ~0u << (j+len) : 0)
+ | ((W-j) < W ? ~0u >> (W-j) : 0);
+ A[i] = (A[i] & mask) | x << j;
+ if (j+len>W) {
+ mask = ((~0u) << (len+j-W));
+ A[i+1] = (A[i+1] & mask)| x >> (W-j);
+ }
+}
+
+/** Retrieve a given bitsequence from array A
+ * @param A Array
+ * @param ini Starting position
+ * @param fin Retrieve until end-1
+ */
+inline uint get_var_field(uint *A, uint ini, uint fin) {
+ if(ini==fin+1) return 0;
+ uint i=ini/W, j=ini-W*i, result;
+ uint len = (fin-ini+1);
+ if (j+len <= W)
+ result = (A[i] << (W-j-len)) >> (W-len);
+ else {
+ result = A[i] >> j;
+ result = result | (A[i+1] << (WW-j-len)) >> (W-len);
+ }
+ return result;
+}
+
+/** Stores a given bitsequence into array A
+ * @param A Array
+ * @param ini Starting position
+ * @param fin Store until end-1
+ * @param x Value to be stored
+ */
+inline void set_var_field(uint *A, uint ini, uint fin, uint x) {
+ if(ini==fin+1) return;
+ uint i=ini/W, j=ini-i*W;
+ uint len = (fin-ini+1);
+ uint mask = ((j+len) < W ? ~0u << (j+len) : 0)
+ | ((W-j) < W ? ~0u >> (W-j) : 0);
+ A[i] = (A[i] & mask) | x << j;
+ if (j+len>W) {
+ mask = ((~0u) << (len+j-W));
+ A[i+1] = (A[i+1] & mask)| x >> (W-j);
+ }
+}
+
+/** Retrieve a given index from array A where every value uses 4 bits
+ * @param A Array
+ * @param index Position to be retrieved
+ */
+inline uint get_field4(uint *A, uint index) {
+ unsigned i=index/8, j=(index&0x7)<<2;
+ return (A[i] << (28-j)) >> (28);
+}
+
+/** Counts the number of 1s in x */
+inline uint popcount(int x){
+ return __popcount_tab[(x >> 0) & 0xff] + __popcount_tab[(x >> 8) & 0xff]
+ + __popcount_tab[(x >> 16) & 0xff] + __popcount_tab[(x >> 24) & 0xff];
+}
+
+/** Counts the number of 1s in the first 16 bits of x */
+inline uint popcount16(int x){
+ return __popcount_tab[x & 0xff] + __popcount_tab[(x >> 8) & 0xff];
+}
+
+/** Counts the number of 1s in the first 8 bits of x */
+inline uint popcount8(int x){
+ return __popcount_tab[x & 0xff];
+}
+
+#endif /* _BASICS_H */
+
--- /dev/null
+
+// implements canonical Huffman
+
+#include <huff.h>
+
+typedef struct
+ { uint freq;
+ uint symb;
+ union
+ { int prev;
+ uint depth;
+ } h;
+ int ch1,ch2;
+ } Ttree;
+
+static void sort (Ttree *tree, int lo, int up)
+
+ { uint i, j;
+ Ttree temp;
+ while (up>lo)
+ { i = lo;
+ j = up;
+ temp = tree[lo];
+ while (i<j)
+ { while (tree[j].freq > temp.freq) j--;
+ tree[i] = tree[j];
+ while (i<j && tree[i].freq <= temp.freq) i++;
+ tree[j] = tree[i];
+ }
+ tree[i] = temp;
+ if (i-lo < up-i) { sort(tree,lo,i-1); lo = i+1; }
+ else { sort(tree,i+1,up); up = i-1; }
+ }
+ }
+
+static void setdepths (Ttree *tree, uint node, int depth)
+
+ { if (tree[node].ch1 == -1) // leaf
+ { tree[node].h.depth = depth;
+ return;
+ }
+ setdepths (tree,tree[node].ch1,depth+1);
+ setdepths (tree,tree[node].ch2,depth+1);
+ }
+
+THuff createHuff (uint *freq, uint lim)
+
+ { THuff H;
+ int i,j,d;
+ Ttree *tree;
+ uint ptr,last,fre;
+ // remove zero frequencies
+ H.max = lim;
+ tree = (Ttree*)malloc((2*(lim+1)-1)*sizeof(Ttree));
+ j = 0;
+ for (i=0;i<=(int)lim;i++)
+ { if (freq[i]>0)
+ { tree[j].freq = freq[i];
+ tree[j].symb = i;
+ j++;
+ }
+ }
+ H.lim = lim = j-1;
+ // now run Huffman algorithm
+ sort (tree,0,lim);
+ for (i=0;i<=(int)lim;i++)
+ { tree[i].h.prev = i+1;
+ tree[i].ch1 = tree[i].ch2 = -1;
+ }
+ tree[lim].h.prev = -1;
+ // last = next node to process, ptr = search point, fre = next free cell
+ // leaves are in 0..lim in decreasing freq order
+ // internal nodes are in lim+1.. 2*lim, created in incr. fre order
+ last=0; ptr = 0; fre = lim+1;
+ for (i=0;i<(int)lim;i++)
+ { tree[fre].ch1 = last;
+ last = tree[last].h.prev;
+ tree[fre].ch2 = last;
+ tree[fre].freq = tree[tree[fre].ch1].freq+tree[tree[fre].ch2].freq;
+ while ((tree[ptr].h.prev != -1) &&
+ (tree[tree[ptr].h.prev].freq <= tree[fre].freq))
+ ptr = tree[ptr].h.prev;
+ tree[fre].h.prev = tree[ptr].h.prev;
+ tree[ptr].h.prev = fre;
+ last = tree[last].h.prev;
+ fre++;
+ }
+ // now assign depths recursively
+ setdepths (tree,2*lim,0);
+ H.s.spos = (uint*)malloc ((H.max+1)*sizeof(uint));
+ for (i=0;i<=(int)H.max;i++) H.s.spos[i] = ~0;
+ H.num = (uint*)malloc ((lim+1)*sizeof(uint)); // max possible depth
+ d=0;
+ for (i=lim;i>=0;i--)
+ { H.s.spos[tree[i].symb] = i;
+ while ((int)tree[i].h.depth > d)
+ { H.num[d] = i+1; d++; }
+ }
+ H.num[d] = 0;
+ H.depth = d;
+ for (d=H.depth;d>0;d--) H.num[d] = H.num[d-1] - H.num[d];
+ H.num[0] = (lim == 0);
+ H.num = (uint*)realloc(H.num,(H.depth+1)*sizeof(uint));
+ H.total = 0;
+ for (i=0;i<=(int)lim;i++)
+ H.total += freq[tree[i].symb] * tree[i].h.depth;
+ free (tree);
+ return H;
+ }
+
+void bitzero (register uint *e, register uint p,
+ register uint len)
+
+ { e += p/W; p %= W;
+ if (p+len >= W)
+ { *e &= ~((1<<p)-1);
+ len -= p;
+ e++; p = 0;
+ }
+ while (len >= W)
+ { *e++ = 0;
+ len -= W;
+ }
+ if (len > 0)
+ *e &= ~(((1<<len)-1)<<p);
+ }
+
+ulong encodeHuff (THuff H, uint symb, uint *stream, ulong ptr)
+
+ { uint pos;
+ uint code;
+ uint d;
+ pos = H.s.spos[symb];
+ code = 0;
+ d = H.depth;
+ while (pos >= H.num[d])
+ { code = (code + H.num[d]) >> 1;
+ pos -= H.num[d--];
+ }
+ code += pos;
+ if (d > W) { bitzero(stream,ptr,d-W); ptr += d-W; d = W; }
+ while (d--)
+ { if ((code >> d) & 1) bitset(stream,ptr);
+ else bitclean(stream,ptr);
+ ptr++;
+ }
+ return ptr;
+ }
+
+ulong decodeHuff (THuff H, uint *symb, uint *stream, ulong ptr)
+
+ { uint pos;
+ uint d;
+ pos = 0;
+ d = 0;
+ while (pos < H.fst[d])
+ { pos = (pos << 1) | bitget(stream,ptr);
+ ptr++; d++;
+ }
+ *symb = H.s.symb[H.num[d]+pos-H.fst[d]];
+ return ptr;
+ }
+
+/* { uint pos; // This "improved" code is actually slower!
+ int d;
+ uint wrd,off;
+ stream += ptr/W;
+ off = ptr & (W-1);
+ wrd = *stream >> off;
+ pos = 0;
+ d = 0;
+ while (pos < H.fst[d])
+ { pos = (pos << 1) | (wrd & 1);
+ d++; wrd >>= 1; off++;
+ if (off == W) { wrd = *++stream; off = 0; }
+ }
+ *symb = H.s.symb[H.num[d]+pos-H.fst[d]];
+ return ptr+d;
+ }
+*/
+void saveHuff (THuff H, FILE *f)
+
+ { uint *symb = (uint*)malloc((H.lim+1)*sizeof(uint));
+ uint i;
+ for (i=0;i<=H.max;i++)
+ if (H.s.spos[i] != (uint)~0) symb[H.s.spos[i]] = i;
+ uint l=fwrite (&H.max,sizeof(uint),1,f);
+ l += fwrite (&H.lim,sizeof(uint),1,f);
+ l += fwrite (&H.depth,sizeof(uint),1,f);
+ l += fwrite (symb,sizeof(uint),H.lim+1,f);
+ l += fwrite (H.num,sizeof(uint),H.depth+1,f);
+ free (symb);
+ }
+
+uint sizeHuff (THuff H)
+
+ { return (4+(H.lim+1)+(H.depth+1))*sizeof(uint);
+ }
+
+void freeHuff (THuff H)
+
+ { free (H.s.spos); free (H.num);
+ }
+
+THuff loadHuff (FILE *f, int enc)
+
+ { THuff H;
+ uint *symb;
+ //uint *num;
+ uint i,d,dold,dact;
+ uint l = fread (&H.max,sizeof(uint),1,f);
+ l += fread (&H.lim,sizeof(uint),1,f);
+ l += fread (&H.depth,sizeof(uint),1,f);
+ symb = (uint*)malloc ((H.lim+1)*sizeof(uint));
+ l += fread (symb,sizeof(uint),H.lim+1,f);
+ if (enc)
+ { H.s.spos = (uint*)malloc ((H.max+1)*sizeof(uint));
+ for (i=0;i<=H.max;i++) H.s.spos[i] = (uint)~0;
+ for (i=0;i<=H.lim;i++) H.s.spos[symb[i]] = i;
+ free (symb);
+ }
+ else H.s.symb = symb;
+ H.num = (uint*)malloc ((H.depth+1)*sizeof(uint));
+ l += fread (H.num,sizeof(uint),H.depth+1,f);
+ if (!enc)
+ { H.fst = (uint*)malloc ((H.depth+1)*sizeof(uint));
+ H.fst[H.depth] = 0; dold = 0;
+ for (d=H.depth-1;d>=0;d--)
+ { dact = H.num[d+1];
+ H.fst[d] = (H.fst[d+1]+dact) >> 1;
+ H.num[d+1] = dold;
+ dold += dact;
+ }
+ H.num[0] = dold;
+ }
+ return H;
+ }
+
+
--- /dev/null
+
+// implements canonical Huffman
+
+#ifndef HUFFINCLUDED
+#define HUFFINCLUDED
+
+#include <basics.h>
+
+typedef struct
+ { uint max,lim; // maximum symbol (0..max), same excluding zero freqs
+ uint depth; // max symbol length
+ union
+ { uint *spos; // symbol positions after sorting by decr freq (enc)
+ uint *symb; // symbols sorted by freq (dec)
+ } s;
+ uint *num; // first pos of each length (dec), number of each length (enc)
+ uint *fst; // first code (numeric) of each length (dec)
+ ulong total; // total length to achieve, in bits
+ } THuff;
+
+ // Creates Huffman encoder given symbols 0..lim with frequencies
+ // freq[i], ready for compression
+
+THuff createHuff (uint *freq, uint lim);
+
+ // Encodes symb using H, over stream[ptr...lim] (ptr and lim are
+ // bit positions of stream). Returns the new ptr.
+
+ulong encodeHuff (THuff H, uint symb, uint *stream, ulong ptr);
+
+ // Decodes *symb using H, over stream[ptr...lim] (ptr and lim are
+ // bit positions of stream). Returns the new ptr.
+
+ulong decodeHuff (THuff H, uint *symb, uint *stream, ulong ptr);
+
+ // Writes H in file f
+
+void saveHuff (THuff H, FILE *f);
+
+ // Size of H written on file
+
+uint sizeHuff (THuff H);
+
+ // Frees H
+
+void freeHuff (THuff H);
+
+ // Loads H from file f, prepared for encoding or decoding depending
+ // on enc
+
+THuff loadHuff (FILE *f, int enc);
+
+#endif
--- /dev/null
+
+#include <huffman_codes.h>
+
+huffman_codes::huffman_codes(uint * symb, uint n) {
+ uint max_v = 0;
+ for(uint i=0;i<n;i++)
+ max_v = max(max_v,symb[i]);
+ uint * occ = new uint[max_v+1];
+ for(uint i=0;i<max_v+1;i++)
+ occ[i] = 0;
+ for(uint i=0;i<n;i++)
+ occ[symb[i]]++;
+ huff_table = createHuff(occ, max_v);
+ delete [] occ;
+}
+
+huffman_codes::huffman_codes() {
+}
+
+huffman_codes::~huffman_codes() {
+ freeHuff(huff_table);
+}
+
+uint huffman_codes::max_length() {
+ return huff_table.depth;
+}
+
+uint huffman_codes::size() {
+ return sizeof(huffman_codes)+sizeHuff(huff_table);
+}
+
+ulong huffman_codes::encode(uint symb, uint * stream, ulong pos) {
+ return encodeHuff(huff_table, symb, stream, pos);
+}
+
+ulong huffman_codes::decode(uint * symb, uint * stream, ulong pos) {
+ return decodeHuff(huff_table, symb, stream, pos);
+}
+
+uint huffman_codes::save(FILE *fp) {
+ saveHuff(huff_table,fp);
+ return 0;
+}
+
+huffman_codes * huffman_codes::load(FILE * fp) {
+ huffman_codes * ret = new huffman_codes();
+ ret->huff_table = loadHuff(fp,1);
+ return ret;
+}
+
+
--- /dev/null
+
+#ifndef HUFFMAN_CODES_H
+#define HUFFMAN_CODES_H
+
+#include <basics.h>
+#include <huff.h>
+
+class huffman_codes {
+
+ public:
+ huffman_codes(uint * seq, uint n);
+ ~huffman_codes();
+
+ ulong encode(uint symb, uint * stream, ulong pos);
+ ulong decode(uint * symb, uint * stream, ulong pos);
+ uint max_length();
+ uint size();
+ uint save(FILE *fp);
+ static huffman_codes * load(FILE *fp);
+
+ protected:
+ huffman_codes();
+ THuff huff_table;
+};
+
+#endif
--- /dev/null
+/* static_bitsequence.cpp
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * static_bitsequence definition
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#include "static_bitsequence.h"
+
+uint static_bitsequence::rank0(uint i) {
+ return i+1-rank1(i);
+}
+
+uint static_bitsequence::rank1(uint i) {
+ if(i>=len) return ones;
+ if(ones==0) return -1;
+ uint ini = 1;
+ uint fin = ones;
+ while(ini<fin) {
+ uint pos = (ini+fin)/2;
+ uint bp = select1(pos);
+ if(bp==i) return pos;
+ if(bp<i)
+ ini = pos+1;
+ else
+ fin = pos-1;
+ }
+ if(select1(ini)>i) return ini-1;
+ return ini;
+}
+
+uint static_bitsequence::select0(uint i) {
+ if(i>len-ones) return len;
+ if(i==0) return -1;
+ uint ini = 0;
+ uint fin = len-1;
+ while(ini<fin) {
+ uint pos = (ini+fin)/2;
+ uint br = rank0(pos);
+ if(br<i)
+ ini = pos+1;
+ else
+ fin = pos;
+ }
+ return ini;
+}
+
+uint static_bitsequence::select1(uint i) {
+ if(i>ones) return len;
+ if(i==0) return 0;
+ uint ini = 0;
+ uint fin = len-1;
+ while(ini<fin) {
+ uint pos = (ini+fin)/2;
+ uint br = rank1(pos);
+ if(br<i)
+ ini = pos+1;
+ else
+ fin = pos;
+ }
+ return ini;
+}
+
+bool static_bitsequence::access(uint i) {
+ return (rank1(i)-(i!=0?rank1(i-1):0))>0;
+}
+
+uint static_bitsequence::length() {
+ return len;
+}
+
+uint static_bitsequence::count_one() {
+ return ones;
+}
+
+uint static_bitsequence::count_zero() {
+ return len-ones;
+}
+
+static_bitsequence * static_bitsequence::load(FILE * fp) {
+ uint r;
+ if(fread(&r,sizeof(uint),1,fp)!=1) return NULL;
+ fseek(fp,-1*sizeof(uint),SEEK_CUR);
+ switch(r) {
+ case RRR02_HDR: return static_bitsequence_rrr02::load(fp);
+ case BRW32_HDR: return static_bitsequence_brw32::load(fp);
+ }
+ return NULL;
+}
+
--- /dev/null
+/* static_bitsequence.h
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * static_bitsequence definition
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#ifndef _STATIC_BITSEQUENCE_H
+#define _STATIC_BITSEQUENCE_H
+
+#define RRR02_HDR 2
+#define BRW32_HDR 3
+
+#include <basics.h>
+#include <iostream>
+
+
+using namespace std;
+
+/** Base class for static bitsequences, contains many abstract functions, so this can't
+ * be instantiated. It includes base implementations for rank0, select0 and select1 based
+ * on rank0.
+ *
+ * @author Francisco Claude
+ */
+class static_bitsequence {
+
+public:
+ virtual ~static_bitsequence() {};
+
+ /** Returns the number of zeros until position i */
+ virtual uint rank0(uint i);
+
+ /** Returns the position of the i-th zero
+ * @return (uint)-1 if i=0, len if i>num_zeros or the position */
+ virtual uint select0(uint i);
+
+ /** Returns the number of ones until position i */
+ virtual uint rank1(uint i);
+
+ /** Returns the position of the i-th one
+ * @return (uint)-1 if i=0, len if i>num_ones or the position */
+ virtual uint select1(uint i);
+
+ /** Returns the i-th bit */
+ virtual bool access(uint i);
+
+ /** Returns the length in bits of the bitmap */
+ virtual uint length();
+
+ /** Returns how many ones are in the bitstring */
+ virtual uint count_one();
+
+ /** Returns how many zeros are in the bitstring */
+ virtual uint count_zero();
+
+ /** Returns the size of the structure in bytes */
+ virtual uint size()=0;
+
+ /** Stores the bitmap given a file pointer, return 0 in case of success */
+ virtual int save(FILE * fp)=0;
+
+ /** Reads a bitmap determining the type */
+ static static_bitsequence * load(FILE * fp);
+
+protected:
+ /** Length of the bitstring */
+ uint len;
+ /** Number of ones in the bitstring */
+ uint ones;
+
+};
+
+#include "static_bitsequence_rrr02.h"
+#include "static_bitsequence_naive.h"
+#include "static_bitsequence_brw32.h"
+
+#endif /* _STATIC_BITSEQUENCE_H */
+
--- /dev/null
+/* static_bitsequence_brw32.cpp
+ Copyright (C) 2005, Rodrigo Gonzalez, all rights reserved.
+
+ New RANK, SELECT, SELECT-NEXT and SPARSE RANK implementations.
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*/
+
+#include "static_bitsequence_brw32.h"
+#include <cassert>
+#include <cmath>
+// #include <sys/types.h>
+
+
+/////////////
+//Rank(B,i)//
+/////////////
+//_factor = 0 => s=W*lgn
+//_factor = P => s=W*P
+//Is interesting to notice
+//factor=2 => overhead 50%
+//factor=3 => overhead 33%
+//factor=4 => overhead 25%
+//factor=20=> overhead 5%
+
+static_bitsequence_brw32::static_bitsequence_brw32(){
+ data=NULL;
+ this->owner = true;
+ this->n=0;
+ this->factor=0;
+}
+
+static_bitsequence_brw32::static_bitsequence_brw32( uint *bitarray, uint _n, uint _factor){
+ /*cout << "*****" << endl;
+ cout << bitarray << endl;
+ cout << _n << endl;
+ cout << _factor << endl; */
+ if(_factor==0) exit(-1);
+ data=new uint[_n/W+1];
+ for(uint i=0;i<uint_len(_n,1);i++)
+ data[i] = bitarray[i];
+ for(uint i=uint_len(_n,1);i<_n/W+1;i++)
+ data[i] = 0;
+ this->owner = true;
+ this->n=_n;
+ uint lgn=bits(n-1);
+ this->factor=_factor;
+ if (_factor==0) this->factor=lgn;
+ else this->factor=_factor;
+ b=32;
+ s=b*this->factor;
+ integers = n/W+1;
+ BuildRank();
+ this->len = n;
+ this->ones = rank1(n-1);
+}
+
+static_bitsequence_brw32::~static_bitsequence_brw32() {
+ delete [] Rs;
+ if (owner) delete [] data;
+}
+
+//Metodo que realiza la busqueda d
+void static_bitsequence_brw32::BuildRank(){
+ uint num_sblock = n/s;
+ Rs = new uint[num_sblock+5];// +1 pues sumo la pos cero
+ for(uint i=0;i<num_sblock+5;i++)
+ Rs[i]=0;
+ uint j;
+ Rs[0]=0;
+ for (j=1;j<=num_sblock;j++) {
+ Rs[j]=Rs[j-1];
+ Rs[j]+=BuildRankSub((j-1)*factor,factor);
+ }
+}
+
+uint static_bitsequence_brw32::BuildRankSub(uint ini,uint bloques){
+ uint rank=0,aux;
+ for(uint i=ini;i<ini+bloques;i++) {
+ if (i < integers) {
+ aux=data[i];
+ rank+=popcount(aux);
+ }
+ }
+ return rank; //retorna el numero de 1's del intervalo
+
+}
+
+uint static_bitsequence_brw32::rank1(uint i) {
+ ++i;
+ uint resp=Rs[i/s];
+ uint aux=(i/s)*factor;
+ for (uint a=aux;a<i/W;a++)
+ resp+=popcount(data[a]);
+ resp+=popcount(data[i/W] & ((1<<(i & mask31))-1));
+ return resp;
+}
+
+bool static_bitsequence_brw32::access(uint i) {
+ return (1u << (i % W)) & data[i/W];
+}
+
+int static_bitsequence_brw32::save(FILE *f) {
+ uint wr = BRW32_HDR;
+ if (f == NULL) return 20;
+ if( fwrite(&wr,sizeof(uint),1,f) != 1 ) return -1;
+ if (fwrite (&n,sizeof(uint),1,f) != 1) return 21;
+ if (fwrite (&factor,sizeof(uint),1,f) != 1) return 21;
+ if (fwrite (data,sizeof(uint),n/W+1,f) != n/W+1) return 21;
+ if (fwrite (Rs,sizeof(uint),n/s+1,f) != n/s+1) return 21;
+ return 0;
+}
+
+static_bitsequence_brw32 * static_bitsequence_brw32::load(FILE *f) {
+ if (f == NULL) return NULL;
+ uint type;
+ if(fread(&type,sizeof(uint),1,f)!=1) return NULL;
+ if(type!=BRW32_HDR) { cout << "type:"<<type<<endl; return NULL; }
+ static_bitsequence_brw32 * ret = new static_bitsequence_brw32();
+ if (fread (&ret->n,sizeof(uint),1,f) != 1) return NULL;
+ ret->b=32; // b is a word
+ if (fread (&ret->factor,sizeof(uint),1,f) != 1) return NULL;
+ ret->s=ret->b*ret->factor;
+ uint aux=(ret->n+1)%W;
+ if (aux != 0)
+ ret->integers = (ret->n+1)/W+1;
+ else
+ ret->integers = (ret->n+1)/W;
+ ret->data = new uint[ret->n/W+1];
+ if (!ret->data) return NULL;
+ if (fread (ret->data,sizeof(uint),ret->n/W+1,f) != ret->n/W+1) return NULL;
+ ret->owner = true;
+ ret->Rs= new uint[ret->n/ret->s+1];
+ if (!ret->Rs) return NULL;
+ if (fread (ret->Rs,sizeof(uint),ret->n/ret->s+1,f) != ret->n/ret->s+1) return NULL;
+ ret->len = ret->n;
+ ret->ones = ret->rank1(ret->n-1);
+ return ret;
+}
+
+uint static_bitsequence_brw32::SpaceRequirementInBits() {
+ return (owner?n:0)+(n/s)*sizeof(uint)*8 +sizeof(static_bitsequence_brw32)*8;
+}
+
+uint static_bitsequence_brw32::size() {
+ return SpaceRequirementInBits()/8;
+}
+
+uint static_bitsequence_brw32::SpaceRequirement() {
+ return (owner?n:0)/8+(n/s)*sizeof(uint)+sizeof(static_bitsequence_brw32);
+}
+
+uint static_bitsequence_brw32::prev2(uint start) {
+ // returns the position of the previous 1 bit before and including start.
+ // tuned to 32 bit machine
+
+ uint i = start >> 5;
+ int offset = (start % W);
+ uint answer = start;
+ uint val = data[i] << (Wminusone-offset);
+
+ if (!val) { val = data[--i]; answer -= 1+offset; }
+
+ while (!val) { val = data[--i]; answer -= W; }
+
+ if (!(val & 0xFFFF0000)) { val <<= 16; answer -= 16; }
+ if (!(val & 0xFF000000)) { val <<= 8; answer -= 8; }
+
+ while (!(val & 0x80000000)) { val <<= 1; answer--; }
+ return answer;
+}
+
+uint static_bitsequence_brw32::prev(uint start) {
+ // returns the position of the previous 1 bit before and including start.
+ // tuned to 32 bit machine
+
+ uint i = start >> 5;
+ int offset = (start % W);
+ uint aux2 = data[i] & (-1u >> (31-offset));
+
+ if (aux2 > 0) {
+ if ((aux2&0xFF000000) > 0) return i*W+23+prev_tab[(aux2>>24)&0xFF];
+ else if ((aux2&0xFF0000) > 0) return i*W+15+prev_tab[(aux2>>16)&0xFF];
+ else if ((aux2&0xFF00) > 0) return i*W+7+prev_tab[(aux2>>8)&0xFF];
+ else return i*W+prev_tab[aux2&0xFF]-1;
+ }
+ for (uint k=i-1;;k--) {
+ aux2=data[k];
+ if (aux2 > 0) {
+ if ((aux2&0xFF000000) > 0) return k*W+23+prev_tab[(aux2>>24)&0xFF];
+ else if ((aux2&0xFF0000) > 0) return k*W+15+prev_tab[(aux2>>16)&0xFF];
+ else if ((aux2&0xFF00) > 0) return k*W+7+prev_tab[(aux2>>8)&0xFF];
+ else return k*W+prev_tab[aux2&0xFF]-1;
+ }
+ }
+ return 0;
+}
+
+uint static_bitsequence_brw32::next(uint k) {
+ uint count = k;
+ uint des,aux2;
+ des=count%W;
+ aux2= data[count/W] >> des;
+ if (aux2 > 0) {
+ if ((aux2&0xff) > 0) return count+select_tab[aux2&0xff]-1;
+ else if ((aux2&0xff00) > 0) return count+8+select_tab[(aux2>>8)&0xff]-1;
+ else if ((aux2&0xff0000) > 0) return count+16+select_tab[(aux2>>16)&0xff]-1;
+ else {return count+24+select_tab[(aux2>>24)&0xff]-1;}
+ }
+
+ for (uint i=count/W+1;i<integers;i++) {
+ aux2=data[i];
+ if (aux2 > 0) {
+ if ((aux2&0xff) > 0) return i*W+select_tab[aux2&0xff]-1;
+ else if ((aux2&0xff00) > 0) return i*W+8+select_tab[(aux2>>8)&0xff]-1;
+ else if ((aux2&0xff0000) > 0) return i*W+16+select_tab[(aux2>>16)&0xff]-1;
+ else {return i*W+24+select_tab[(aux2>>24)&0xff]-1;}
+ }
+ }
+ return n;
+}
+
+uint static_bitsequence_brw32::select1(uint x) {
+ // returns i such that x=rank(i) && rank(i-1)<x or n if that i not exist
+ // first binary search over first level rank structure
+ // then sequential search using popcount over a int
+ // then sequential search using popcount over a char
+ // then sequential search bit a bit
+
+ //binary search over first level rank structure
+ uint l=0, r=n/s;
+ uint mid=(l+r)/2;
+ uint rankmid = Rs[mid];
+ while (l<=r) {
+ if (rankmid<x)
+ l = mid+1;
+ else
+ r = mid-1;
+ mid = (l+r)/2;
+ rankmid = Rs[mid];
+ }
+ //sequential search using popcount over a int
+ uint left;
+ left=mid*factor;
+ x-=rankmid;
+ uint j=data[left];
+ uint ones = popcount(j);
+ while (ones < x) {
+ x-=ones;left++;
+ if (left > integers) return n;
+ j = data[left];
+ ones = popcount(j);
+ }
+ //sequential search using popcount over a char
+ left=left*b;
+ rankmid = popcount8(j);
+ if (rankmid < x) {
+ j=j>>8;
+ x-=rankmid;
+ left+=8;
+ rankmid = popcount8(j);
+ if (rankmid < x) {
+ j=j>>8;
+ x-=rankmid;
+ left+=8;
+ rankmid = popcount8(j);
+ if (rankmid < x) {
+ j=j>>8;
+ x-=rankmid;
+ left+=8;
+ }
+ }
+ }
+
+ // then sequential search bit a bit
+ while (x>0) {
+ if (j&1) x--;
+ j=j>>1;
+ left++;
+ }
+ return left-1;
+}
--- /dev/null
+/* static_bitsequence_brw32.h
+ Copyright (C) 2005, Rodrigo Gonzalez, all rights reserved.
+
+ New RANK, SELECT, SELECT-NEXT and SPARSE RANK implementations.
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*/
+
+#ifndef _STATIC_BITSEQUENCE_BRW32_H
+#define _STATIC_BITSEQUENCE_BRW32_H
+
+#include <basics.h>
+#include <static_bitsequence.h>
+/////////////
+//Rank(B,i)//
+/////////////
+//_factor = 0 => s=W*lgn
+//_factor = P => s=W*P
+//Is interesting to notice
+//factor=2 => overhead 50%
+//factor=3 => overhead 33%
+//factor=4 => overhead 25%
+//factor=20=> overhead 5%
+
+/** Implementation of Rodrigo Gonzalez et al. practical rank/select solution [1].
+ * The interface was adapted.
+ *
+ * [1] Rodrigo Gonzalez, Szymon Grabowski, Veli Makinen, and Gonzalo Navarro.
+ * Practical Implementation of Rank and Select Queries. WEA05.
+ *
+ * @author Rodrigo Gonzalez
+ */
+class static_bitsequence_brw32 : public static_bitsequence {
+private:
+ uint *data;
+ bool owner;
+ uint n,integers;
+ uint factor,b,s;
+ uint *Rs; //superblock array
+
+ uint BuildRankSub(uint ini,uint fin); //uso interno para contruir el indice rank
+ void BuildRank(); //crea indice para rank
+ static_bitsequence_brw32();
+
+public:
+ static_bitsequence_brw32(uint *bitarray, uint n, uint factor);
+ ~static_bitsequence_brw32(); //destructor
+ virtual bool access(uint i);
+ virtual uint rank1(uint i); //Nivel 1 bin, nivel 2 sec-pop y nivel 3 sec-bit
+
+ uint prev(uint start); // gives the largest index i<=start such that IsBitSet(i)=true
+ uint prev2(uint start); // gives the largest index i<=start such that IsBitSet(i)=true
+ uint next(uint start); // gives the smallest index i>=start such that IsBitSet(i)=true
+ virtual uint select1(uint x); // gives the position of the x:th 1.
+ uint SpaceRequirementInBits();
+ uint SpaceRequirement();
+ virtual uint size();
+
+ /*load-save functions*/
+ virtual int save(FILE *f);
+ static static_bitsequence_brw32 * load(FILE * fp);
+};
+
+#endif
+
--- /dev/null
+/* static_bitsequence_builder.h
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * static_bitsequence_builder definition
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#ifndef _STATIC_BITSEQUENCE_BUILDER_H
+#define _STATIC_BITSEQUENCE_BUILDER_H
+
+class static_bitsequence_builder {
+ public:
+ virtual ~static_bitsequence_builder() {}
+ virtual static_bitsequence * build(uint * bitsequence, uint len)=0;
+};
+
+#include <static_bitsequence_builder_rrr02.h>
+#include <static_bitsequence_builder_brw32.h>
+
+#endif /* _STATIC_BITSEQUENCE_BUILDER_H */
--- /dev/null
+
+#include <static_bitsequence_builder_brw32.h>
+
+static_bitsequence_builder_brw32::static_bitsequence_builder_brw32(uint sampling) {
+ this->sample_rate=sampling;
+}
+
+static_bitsequence * static_bitsequence_builder_brw32::build(uint * bitsequence, uint len) {
+ return new static_bitsequence_brw32(bitsequence,len,this->sample_rate);
+}
--- /dev/null
+/* static_bitsequence_builder_brw32.h
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * static_bitsequence_builder definition
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#ifndef _STATIC_BITSEQUENCE_BUILDER_BRW32_H
+#define _STATIC_BITSEQUENCE_BUILDER_BRW32_H
+
+#include <basics.h>
+#include <static_bitsequence.h>
+#include <static_bitsequence_builder.h>
+
+class static_bitsequence_builder_brw32 : public static_bitsequence_builder {
+ public:
+ static_bitsequence_builder_brw32(uint sampling);
+ virtual ~static_bitsequence_builder_brw32() {}
+ virtual static_bitsequence * build(uint * bitsequence, uint len);
+
+ protected:
+ uint sample_rate;
+};
+
+#endif /* _STATIC_BITSEQUENCE_BUILDER_BRW32_H */
--- /dev/null
+
+#include <static_bitsequence_builder_rrr02.h>
+
+static_bitsequence_builder_rrr02::static_bitsequence_builder_rrr02(uint sampling) {
+ sample_rate=sampling;
+}
+
+static_bitsequence * static_bitsequence_builder_rrr02::build(uint * bitsequence, uint len) {
+ return new static_bitsequence_rrr02(bitsequence,len,sample_rate);
+}
--- /dev/null
+/* static_bitsequence_builder_rrr02.h
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * static_bitsequence_builder definition
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#ifndef _STATIC_BITSEQUENCE_BUILDER_RRR02_H
+#define _STATIC_BITSEQUENCE_BUILDER_RRR02_H
+
+#include <basics.h>
+#include <static_bitsequence.h>
+#include <static_bitsequence_builder.h>
+
+class static_bitsequence_builder_rrr02 : public static_bitsequence_builder {
+ public:
+ static_bitsequence_builder_rrr02(uint sampling);
+ virtual ~static_bitsequence_builder_rrr02() {}
+ virtual static_bitsequence * build(uint * bitsequence, uint len);
+
+ protected:
+ uint sample_rate;
+};
+
+#endif /* _STATIC_BITSEQUENCE_BUILDER_RRR02_H */
--- /dev/null
+/* static_bitsequence_naive.cpp
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * Naive Bitsequence - don't use, only for testing
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#include "static_bitsequence_naive.h"
+
+static_bitsequence_naive::static_bitsequence_naive(uint * bitseq, uint len) {
+ this->len = len;
+ this->bitseq = new uint[len/W+(len%W>0)];
+ for(uint i=0;i<len/W+(len%W>0);i++)
+ this->bitseq[i] = bitseq[i];
+ uint ret = 0;
+ for(uint k=0;k<len;k++)
+ if(bitget(bitseq,k))
+ ret++;
+ this->ones = ret;
+}
+
+static_bitsequence_naive::~static_bitsequence_naive() {
+ delete [] bitseq;
+}
+
+uint static_bitsequence_naive::rank1(uint i) {
+ if(i>=len) return ones;
+ uint ret = 0;
+ for(uint k=0;k<=i;k++)
+ if(bitget(bitseq,k))
+ ret++;
+ return ret;
+}
+
+uint static_bitsequence_naive::select1(uint i) {
+ if(i==0) return (uint)-1;
+ if(i>ones) return len;
+ uint cnt = 0;
+ for(uint k=0;k<len;k++) {
+ if(bitget(bitseq,k))
+ cnt++;
+ if(cnt==i)
+ return k;
+ }
+ return len;
+}
+
+bool static_bitsequence_naive::access(uint i) {
+ return bitget(bitseq,i)!=0;
+}
+
+uint static_bitsequence_naive::size() {
+ return BW*uint_len(len,1);
+}
+
+int static_bitsequence_naive::save(FILE * fp) { return -1; }
+
--- /dev/null
+/* static_bitsequence_naive.h
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * Naive Bitsequence - don't use, only for testing
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+
+#ifndef _STATIC_BITSEQUENCE_NAIVE_H
+#define _STATIC_BITSEQUENCE_NAIVE_H
+
+#include <static_bitsequence.h>
+
+/** Class used for testing, should not be used with long bitmaps
+ * @author Francisco Claude
+ */
+class static_bitsequence_naive: public static_bitsequence {
+public:
+ /** Builds a naive bitsequence, receives the bitmap and the length
+ * in bits
+ */
+ static_bitsequence_naive(uint * bitseq, uint len);
+
+ virtual ~static_bitsequence_naive();
+
+ /** Returns the number of ones until position i */
+ virtual uint rank1(uint i);
+
+ /** Returns the position of the i-th one
+ * @return (uint)-1 if i=0, len if i>num_ones or the position */
+ virtual uint select1(uint i);
+
+ /** Returns the i-th bit */
+ virtual bool access(uint i);
+
+ /** Returns the size of the structure in bytes */
+ virtual uint size();
+
+ /** - Not implemented - */
+ virtual int save(FILE * fp);
+
+
+protected:
+ uint * bitseq;
+};
+
+#endif /* _STATIC_BITSEQUENCE_NAIVE_H */
+
--- /dev/null
+/* static_bitsequence_rrr02.cpp
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * static_bitsequence_rrr02 definition
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#include "static_bitsequence_rrr02.h"
+
+table_offset * static_bitsequence_rrr02::E = NULL;
+
+static_bitsequence_rrr02::static_bitsequence_rrr02() {
+ ones=0;
+ len=0;
+ if(E==NULL) E = new table_offset(BLOCK_SIZE);
+ E->use();
+ C = NULL;
+ O = NULL;
+ C_sampling = NULL;
+ O_pos = NULL;
+ sample_rate = DEFAULT_SAMPLING;
+ C_len = O_len = C_sampling_len = O_pos_len = 0;
+ O_bits_len = C_sampling_field_bits = O_pos_field_bits = 0;
+}
+
+static_bitsequence_rrr02::static_bitsequence_rrr02(uint * bitseq, uint len, uint sample_rate) {
+ ones = 0;
+ this->len = len;
+ if(E==NULL) E = new table_offset(BLOCK_SIZE);
+ E->use();
+ // Table C
+ C_len = len/BLOCK_SIZE + (len%BLOCK_SIZE!=0);
+ C_field_bits = bits(BLOCK_SIZE);
+ C = new uint[uint_len(C_len,C_field_bits)];
+ for(uint i=0;i<uint_len(C_len,C_field_bits);i++)
+ C[i] = 0;
+ O_bits_len = 0;
+ for(uint i=0;i<C_len;i++) {
+ uint value = popcount(get_var_field(bitseq,i*BLOCK_SIZE,min((uint)len-1,(i+1)*BLOCK_SIZE-1)));
+ assert(value<=BLOCK_SIZE);
+ set_field(C,C_field_bits,i,value);
+ ones += value;
+ O_bits_len += E->get_log2binomial(BLOCK_SIZE,value);
+ }
+ // Table O
+ O_len = uint_len(1,O_bits_len);
+ O = new uint[O_len];
+ for(uint i=0;i<O_len;i++)
+ O[i] = 0;
+ uint O_pos = 0;
+ for(uint i=0;i<C_len;i++) {
+ uint value = (ushort)get_var_field(bitseq,i*BLOCK_SIZE,min((uint)len-1,(i+1)*BLOCK_SIZE-1));
+ set_var_field(O,O_pos,O_pos+E->get_log2binomial(BLOCK_SIZE,popcount(value))-1,E->compute_offset((ushort)value));
+ O_pos += E->get_log2binomial(BLOCK_SIZE,popcount(value));
+ }
+ C_sampling = NULL;
+ this->O_pos = NULL;
+
+ create_sampling(sample_rate);
+}
+
+void static_bitsequence_rrr02::create_sampling(uint sample_rate) {
+ this->sample_rate = sample_rate;
+/* for(uint i=0;i<C_len;i++) {
+ O_bits_len += E->get_log2binomial(BLOCK_SIZE,get_field(C,C_field_bits,i));
+ }*/
+ // Sampling for C
+ C_sampling_len = C_len/sample_rate+2;
+ C_sampling_field_bits = bits(ones);
+ if(C_sampling!=NULL) delete [] C_sampling;
+ C_sampling = new uint[max((uint)1,uint_len(C_sampling_len,C_sampling_field_bits))];
+ for(uint i=0;i<max((uint)1,uint_len(C_sampling_len,C_sampling_field_bits));i++)
+ C_sampling[i] = 0;
+ uint sum = 0;
+ for(uint i=0;i<C_len;i++) {
+ if(i%sample_rate==0)
+ set_field(C_sampling,C_sampling_field_bits,i/sample_rate,sum);
+ sum += get_field(C,C_field_bits,i);
+ }
+ for(uint i=(C_len-1)/sample_rate+1;i<C_sampling_len;i++)
+ set_field(C_sampling,C_sampling_field_bits,i,sum);
+ // Sampling for O (table S) (Code separated from previous construction for readability)
+ O_pos_len = C_len/sample_rate+1;
+ O_pos_field_bits = bits(O_bits_len);
+ if(O_pos!=NULL) delete [] O_pos;
+ O_pos = new uint[uint_len(O_pos_len,O_pos_field_bits)];
+ for(uint i=0;i<uint_len(O_pos_len,O_pos_field_bits);i++)
+ O_pos[i] = 0;
+ uint pos = 0;
+ for(uint i=0;i<C_len;i++) {
+ if(i%sample_rate==0)
+ set_field(O_pos,O_pos_field_bits,i/sample_rate,pos);
+ pos += E->get_log2binomial(BLOCK_SIZE,get_field(C,C_field_bits,i));
+ }
+}
+
+bool static_bitsequence_rrr02::access(uint i) {
+ uint nearest_sampled_value = i/BLOCK_SIZE/sample_rate;
+ uint pos_O = get_field(O_pos,O_pos_field_bits,nearest_sampled_value);
+ uint pos = i/BLOCK_SIZE;
+ assert(pos<=C_len);
+ for(uint k=nearest_sampled_value*sample_rate;k<pos;k++) {
+ uint aux = get_field(C,C_field_bits,k);
+ pos_O += E->get_log2binomial(BLOCK_SIZE,aux);
+ }
+ uint c = get_field(C,C_field_bits,pos);
+ return ((1<<(i%BLOCK_SIZE))&E->short_bitmap(c,get_var_field(O,pos_O,pos_O+E->get_log2binomial(BLOCK_SIZE,c)-1)))!=0;
+}
+
+uint static_bitsequence_rrr02::rank0(uint i) {
+ if(i+1==0) return 0;
+ return 1+i-rank1(i);
+}
+
+uint static_bitsequence_rrr02::rank1(uint i) {
+ if(i+1==0) return 0;
+ uint nearest_sampled_value = i/BLOCK_SIZE/sample_rate;
+ uint sum = get_field(C_sampling,C_sampling_field_bits,nearest_sampled_value);
+ uint pos_O = get_field(O_pos,O_pos_field_bits,nearest_sampled_value);
+ uint pos = i/BLOCK_SIZE;
+ uint k=nearest_sampled_value*sample_rate;
+ if(k%2==1 && k<pos) {
+ uint aux = get_field(C,C_field_bits,k);
+ sum += aux;
+ pos_O += E->get_log2binomial(BLOCK_SIZE,aux);
+ k++;
+ }
+ uchar * a = (uchar *)C;
+ uint mask = 0x0F;
+ a += k/2;
+ while(k<(uint)max(0,(int)pos-1)) {
+ assert(((*a)&mask)==get_field(C,C_field_bits,k));
+ assert((*a)/16==get_field(C,C_field_bits,k+1));
+ sum += ((*a)&mask)+(*a)/16;
+ pos_O += E->get_log2binomial(BLOCK_SIZE,((*a)&mask))+E->get_log2binomial(BLOCK_SIZE,((*a)/16));
+ a++;
+ k+=2;
+ }
+ if(k<pos) {
+ uint aux = get_field(C,C_field_bits,k);
+ sum += aux;
+ pos_O += E->get_log2binomial(BLOCK_SIZE,aux);
+ k++;
+ }
+ uint c = get_field(C,C_field_bits,pos);
+ sum += popcount(((2<<(i%BLOCK_SIZE))-1) & E->short_bitmap(c,get_var_field(O,pos_O,pos_O+E->get_log2binomial(BLOCK_SIZE,c)-1)));
+ return sum;
+}
+
+uint static_bitsequence_rrr02::select0(uint i) {
+ if(i==0) return -1;
+ if(i>len-ones) return len;
+ // Search over partial sums
+ uint start=0;
+ uint end=C_sampling_len-1;
+ uint med, acc=0, pos;
+ while(start<end-1) {
+ med = (start+end)/2;
+ acc = med*sample_rate*BLOCK_SIZE-get_field(C_sampling,C_sampling_field_bits,med);
+ if(acc<i) {
+ if(med==start) break;
+ start=med;
+ }
+ else {
+ if(end==0) break;
+ end = med-1;
+ }
+ }
+ acc = get_field(C_sampling,C_sampling_field_bits,start);
+ while(start<C_len-1 && acc+sample_rate*BLOCK_SIZE==get_field(C_sampling,C_sampling_field_bits,start+1)) {
+ start++;
+ acc +=sample_rate*BLOCK_SIZE;
+ }
+ acc = start*sample_rate*BLOCK_SIZE-acc;
+ pos = (start)*sample_rate;
+ uint pos_O = get_field(O_pos,O_pos_field_bits,start);
+ // Sequential search over C
+ uint s = 0;
+ for(;pos<C_len;pos++) {
+ s = get_field(C,C_field_bits,pos);
+ if(acc+BLOCK_SIZE-s>=i) break;
+ pos_O += E->get_log2binomial(BLOCK_SIZE,s);
+ acc += BLOCK_SIZE-s;
+ }
+ pos = (pos)*BLOCK_SIZE;
+ // Search inside the block
+
+ while(acc<i) {
+ uint new_posO = pos_O+E->get_log2binomial(BLOCK_SIZE,s);
+ uint block = E->short_bitmap(s,get_var_field(O,pos_O,new_posO-1));
+ pos_O = new_posO;
+ new_posO = 0;
+ while(acc<i && new_posO<BLOCK_SIZE) {
+ pos++;new_posO++;
+ acc += (((block&1)==0)?1:0);
+ block = block/2;
+ }
+ }
+ pos--;
+ assert(acc==i);
+ assert(rank0(pos)==i);
+ assert(!access(pos));
+ return pos;
+}
+
+uint static_bitsequence_rrr02::select1(uint i) {
+ if(i==0) return -1;
+ if(i>ones) return len;
+ // Search over partial sums
+ uint start=0;
+ uint end=C_sampling_len-1;
+ uint med, acc=0, pos;
+ while(start<end-1) {
+ med = (start+end)/2;
+ acc = get_field(C_sampling,C_sampling_field_bits,med);
+ if(acc<i) {
+ if(med==start) break;
+ start=med;
+ }
+ else {
+ if(end==0) break;
+ end = med-1;
+ }
+ }
+ acc = get_field(C_sampling,C_sampling_field_bits,start);
+ while(start<C_len-1 && acc==get_field(C_sampling,C_sampling_field_bits,start+1)) start++;
+ pos = (start)*sample_rate;
+ uint pos_O = get_field(O_pos,O_pos_field_bits,start);
+ acc = get_field(C_sampling,C_sampling_field_bits,start);
+ // Sequential search over C
+ uint s = 0;
+ for(;pos<C_len;pos++) {
+ s = get_field(C,C_field_bits,pos);
+ if(acc+s>=i) break;
+ pos_O += E->get_log2binomial(BLOCK_SIZE,s);
+ acc += s;
+ }
+ pos = (pos)*BLOCK_SIZE;
+ //cout << "pos=" << pos << endl;
+ // Search inside the block
+ while(acc<i) {
+ uint new_posO = pos_O+E->get_log2binomial(BLOCK_SIZE,s);
+ uint block = E->short_bitmap(s,get_var_field(O,pos_O,new_posO-1));
+ pos_O = new_posO;
+ new_posO = 0;
+ while(acc<i && new_posO<BLOCK_SIZE) {
+ pos++;new_posO++;
+ acc += (((block&1)!=0)?1:0);
+ block = block/2;
+ }
+ //cout << "i=" << i << " len=" << len << " ones=" << ones << " pos=" << pos << " acc=" << acc << " rank=" << rank1(pos) << endl;
+ }
+ pos--;
+ assert(acc==i);
+ assert(rank1(pos)==i);
+ assert(access(pos));
+ return pos;
+}
+
+uint static_bitsequence_rrr02::size() {
+ /*cout << "RRR02 SIZE: " << endl;
+ cout << "Default: " << 9*sizeof(uint)+sizeof(uint*)*4 << endl;
+ cout << "Cs: " << uint_len(C_len,C_field_bits)*sizeof(uint) << endl;
+ cout << "Os: " << O_len*sizeof(uint) << endl;
+ cout << "CSamp: " << uint_len(C_sampling_len,C_sampling_field_bits)*sizeof(uint) << endl;
+ cout << "OSamp: " << uint_len(O_pos_len,O_pos_field_bits)*sizeof(uint) << endl;
+ cout << "E: " << E->size() << endl;*/
+ uint sum = sizeof(static_bitsequence_rrr02);
+ sum += uint_len(C_len,C_field_bits)*sizeof(uint);
+ sum += O_len*sizeof(uint);
+ sum += uint_len(C_sampling_len,C_sampling_field_bits)*sizeof(uint);
+ sum += uint_len(O_pos_len,O_pos_field_bits)*sizeof(uint);
+ //sum += E->size();
+ return sum;
+}
+
+static_bitsequence_rrr02::~static_bitsequence_rrr02() {
+ if(C!=NULL) delete [] C;
+ if(O!=NULL) delete [] O;
+ if(C_sampling!=NULL) delete [] C_sampling;
+ if(O_pos!=NULL) delete [] O_pos;
+ E = E->unuse();
+}
+
+int static_bitsequence_rrr02::save(FILE * fp) {
+ uint wr = RRR02_HDR;
+ wr = fwrite(&wr,sizeof(uint),1,fp);
+ wr += fwrite(&len,sizeof(uint),1,fp);
+ wr += fwrite(&ones,sizeof(uint),1,fp);
+ wr += fwrite(&C_len,sizeof(uint),1,fp);
+ wr += fwrite(&C_field_bits,sizeof(uint),1,fp);
+ wr += fwrite(&O_len,sizeof(uint),1,fp);
+ wr += fwrite(&O_bits_len,sizeof(uint),1,fp);
+ wr += fwrite(&sample_rate,sizeof(uint),1,fp);
+ if(wr!=8) return -1;
+ wr = fwrite(C,sizeof(uint),uint_len(C_len,C_field_bits),fp);
+ if(wr!=uint_len(C_len,C_field_bits)) return -1;
+ wr = fwrite(O,sizeof(uint),O_len,fp);
+ if(wr!=O_len) return -1;
+ return 0;
+}
+
+static_bitsequence_rrr02 * static_bitsequence_rrr02::load(FILE * fp) {
+ static_bitsequence_rrr02 * ret = new static_bitsequence_rrr02();
+ uint rd = 0, type;
+ rd += fread(&type,sizeof(uint),1,fp);
+ rd += fread(&ret->len,sizeof(uint),1,fp);
+ rd += fread(&ret->ones,sizeof(uint),1,fp);
+ rd += fread(&ret->C_len,sizeof(uint),1,fp);
+ rd += fread(&ret->C_field_bits,sizeof(uint),1,fp);
+ rd += fread(&ret->O_len,sizeof(uint),1,fp);
+ rd += fread(&ret->O_bits_len,sizeof(uint),1,fp);
+ rd += fread(&ret->sample_rate,sizeof(uint),1,fp);
+ if(rd!=8 || type!=RRR02_HDR) {
+ delete ret;
+ return NULL;
+ }
+ ret->C = new uint[uint_len(ret->C_len,ret->C_field_bits)];
+ rd = fread(ret->C,sizeof(uint),uint_len(ret->C_len,ret->C_field_bits),fp);
+ if(rd!=uint_len(ret->C_len,ret->C_field_bits)) {
+ ret->C=NULL;
+ delete ret;
+ return NULL;
+ }
+ ret->O = new uint[ret->O_len];
+ rd = fread(ret->O,sizeof(uint),ret->O_len,fp);
+ if(rd!=ret->O_len) {
+ ret->O=NULL;
+ delete ret;
+ return NULL;
+ }
+ ret->create_sampling(ret->sample_rate);
+ return ret;
+}
+
--- /dev/null
+/* static_bitsequence_rrr02.h
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * RRR02 Bitsequence -
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+
+#ifndef _STATIC_BITSEQUENCE_RRR02_H
+#define _STATIC_BITSEQUENCE_RRR02_H
+
+#define BLOCK_SIZE 15
+#define DEFAULT_SAMPLING 32
+
+#include <static_bitsequence.h>
+#include <table_offset.h>
+#include <cassert>
+#include <iostream>
+
+using namespace std;
+
+/** Implementation of Raman, Raman and Rao's [1] proposal for rank/select capable
+ * data structures, it achieves space nH_0, O(sample_rate) time for rank and O(log len)
+ * for select. The practial implementation is based on [2]
+ *
+ * [1] R. Raman, V. Raman and S. Rao. Succinct indexable dictionaries with applications
+ * to encoding $k$-ary trees and multisets. SODA02.
+ * [2] F. Claude and G. Navarro. Practical Rank/Select over Arbitrary Sequences. SPIRE08.
+ *
+ * @author Francisco Claude
+ */
+class static_bitsequence_rrr02: public static_bitsequence {
+public:
+ static_bitsequence_rrr02(uint * bitseq, uint len, uint sample_rate=DEFAULT_SAMPLING);
+ virtual ~static_bitsequence_rrr02();
+
+ /** Returns the number of zeros until position i */
+ virtual uint rank0(uint i);
+
+ /** Returns the number of ones until position i */
+ virtual uint rank1(uint i);
+
+ /** Returns the position of the i-th zero
+ * @return (uint)-1 if i=0, len if i>num_zeros or the position */
+ virtual uint select0(uint i);
+
+ /** Returns the position of the i-th one
+ * @return (uint)-1 if i=0, len if i>num_ones or the position */
+ virtual uint select1(uint i);
+
+ /** Returns the i-th bit */
+ virtual bool access(uint i);
+
+ /** Returns the size of the structure in bytes */
+ virtual uint size();
+
+ /** Stores the bitmap given a file pointer, return 0 in case of success */
+ virtual int save(FILE * fp);
+
+ /** Reads the bitmap from a file pointer, returns NULL in case of error */
+ static static_bitsequence_rrr02 * load(FILE * fp);
+
+ /** Creates a new sampling for the queries */
+ void create_sampling(uint sampling_rate);
+
+ /** Frees the space required by the table E, which is static and global
+ * to all instances.
+ */
+ static void delete_E() {
+ delete E;
+ }
+
+
+protected:
+ static_bitsequence_rrr02();
+ /** Classes and offsets */
+ uint *C, *O;
+ /** Length of C and O (in uints) */
+ uint C_len, O_len;
+ /** Bits required per field for C and in total for O */
+ uint C_field_bits, O_bits_len;
+ /** C and O samplings */
+ uint *C_sampling, *O_pos;
+ /** Length of the samplings */
+ uint C_sampling_len,O_pos_len;
+ /** Lenght in bits per field */
+ uint C_sampling_field_bits,O_pos_field_bits;
+ /** Sample rate */
+ uint sample_rate;
+
+ static table_offset * E;
+};
+
+#endif /* _STATIC_BITSEQUENCE_RRR02_H */
+
+
--- /dev/null
+/* table_offset.cpp
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * Table for offsets.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+
+#include "table_offset.h"
+
+
+// Interface for old implementation
+void genera(ushort * bch, uint u, ushort * F, uint lF);
+uint generaClase(ushort * bch, uint u, uint clase, uint puestos, uint pos_ini, uint generado);
+uint offset_func(uint u, uint busca);
+uint offsetRecursivo(uint u, uint busca, uint clase, uint puestos, uint pos_ini, uint generado);
+uint __indiceFunc;
+uint __indAcumulado;
+ushort * __Lis;
+// End interface old implementation
+
+
+table_offset::table_offset(uint u) {
+ this->u = u;
+ users_count = 0;
+ short_bitmaps = new ushort[((1<<u)+1)];
+ offset_class = new ushort[u+2];
+ binomial = new uint*[u+1];
+ log2binomial = new ushort*[u+1];
+ for(uint i=0;i<u+1;i++) {
+ binomial[i] = new uint[u+1];
+ log2binomial[i] = new ushort[u+1];
+ for(uint j=0;j<u+1;j++) {
+ binomial[i][j] = 0;
+ log2binomial[i][j] = 0;
+ }
+ }
+ for(uint i=0;i<u+1;i++) {
+ binomial[i][0] = 1;
+ binomial[i][1] = 1;
+ binomial[i][i] = 1;
+ log2binomial[i][0] = 0;
+ log2binomial[i][1] = 0;
+ log2binomial[i][i] = 0;
+ }
+ for(uint j=1;j<u+1;j++) {
+ for(uint i=j+1;i<u+1;i++) {
+ binomial[i][j] = binomial[i-1][j-1]+binomial[i-1][j];
+ log2binomial[i][j] = bits(binomial[i][j]-1);
+ }
+ }
+ fill_tables();
+}
+
+void table_offset::fill_tables() {
+ genera(short_bitmaps, u, offset_class, u);
+ rev_offset = __Lis;
+ //delete [] __Lis;
+}
+
+table_offset::~table_offset() {
+ delete [] short_bitmaps;
+ delete [] offset_class;
+ for(uint i=0;i<u+1;i++) {
+ delete [] binomial[i];
+ delete [] log2binomial[i];
+ }
+ delete [] binomial;
+ delete [] log2binomial;
+ delete [] rev_offset;
+}
+
+uint table_offset::size() {
+ uint ret = sizeof(ushort)*(((1<<u)+1)+u+1);
+ ret += (sizeof(uint)+sizeof(ushort))*((u+1)*(u+1));
+ ret += sizeof(ushort)*(2<<(u+1));
+ ret += sizeof(ushort)*(u+2);
+ return ret;
+}
+
+// OLD implementation, replace
+
+void genera(ushort * bch, uint u, ushort * F, uint lF) {
+ __indAcumulado=0;
+ __indiceFunc=0;
+ F[0]=0;
+ __Lis = new ushort[(2<<(u+1))]; //(uint *)malloc((2<<u+1)*sizeof(uint));
+ for (uint i=0;i<=u;i++) {
+ __indAcumulado += generaClase(bch, u, i, 0, 0, 0);
+ F[i+1] = __indiceFunc;
+ }
+}
+
+uint generaClase(ushort * bch, uint u, uint clase, uint puestos, uint pos_ini, uint generado) {
+ uint ret=0;
+ if (clase==puestos) {
+ bch[__indiceFunc] = generado;
+ __Lis[generado] = __indiceFunc-__indAcumulado;
+ __indiceFunc++;
+ return 1;
+ }
+ if (clase<puestos)
+ return 0;
+ for (uint i=pos_ini;i<u;i++) {
+ uint tmp = generado | (1<<i);
+ ret += generaClase(bch, u, clase, puestos+1, i+1, tmp);
+ }
+ return ret;
+}
+
+
--- /dev/null
+/* table_offset.h
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * Table for offsets definition.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#ifndef _TABLE_OFFSET_H
+#define _TABLE_OFFSET_H
+
+#include <basics.h>
+#include <iostream>
+
+using namespace std;
+
+/** Universal table required for static_bitsequence_rrr02, Raman, Raman and Rao's [1]
+ * proposal for rank/select capable data structures, it achieves space nH_0,
+ * O(sample_rate) time for rank and O(log len) for select. The practial implementation
+ * is based on [2]
+ *
+ * [1] R. Raman, V. Raman and S. Rao. Succinct indexable dictionaries with applications
+ * to encoding $k$-ary trees and multisets. SODA02.
+ * [2] F. Claude and G. Navarro. Practical Rank/Select over Arbitrary Sequences. SPIRE08.
+ *
+ * @author Francisco Claude
+ */
+class table_offset {
+
+public:
+ table_offset(uint u);
+ ~table_offset();
+
+ /** Increments the counter of users for the table */
+ inline void use() {
+ users_count++;
+ }
+
+ /** Tells the object that the user is not going to need the table anymore. */
+ inline table_offset * unuse() {
+ users_count--;
+ if(!users_count) {
+ delete this;
+ return NULL;
+ }
+ return this;
+ }
+
+ /** Computes binomial(n,k) for n,k<=u */
+ inline uint get_binomial(uint n, uint k) {
+ return binomial[n][k];
+ }
+
+ /** Computes ceil(log2(binomial(n,k))) for n,k<=u */
+ inline ushort get_log2binomial(uint n, uint k) {
+ return log2binomial[n][k];
+ }
+
+ /** Returns the bitmap represented by the given class and inclass offsets */
+ inline ushort short_bitmap(uint class_offset, uint inclass_offset) {
+ if(class_offset==0) return 0;
+ if(class_offset==u) return (ushort)(((uint)1<<u)-1);
+ return short_bitmaps[offset_class[class_offset]+inclass_offset];
+ }
+
+ /** Returns u */
+ inline uint get_u() {
+ return u;
+ }
+
+ /** Computes the offset of the first u bits of a given bitstring */
+ inline ushort compute_offset(ushort v) {
+ return rev_offset[v];
+ }
+
+ /** Returns the size of the bitmap in bytes */
+ uint size();
+
+protected:
+ int users_count;
+ uint u;
+ uint ** binomial;
+ ushort * rev_offset;
+ ushort ** log2binomial;
+ ushort * offset_class;
+ ushort * short_bitmaps;
+
+ void fill_tables();
+};
+
+#endif
+
--- /dev/null
+
+#include <static_sequence.h>
+
+static_sequence::static_sequence() {}
+static_sequence::~static_sequence() {}
+uint static_sequence::length() { return len; }
+
+static_sequence * static_sequence::load(FILE * fp) {
+ uint rd;
+ if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL;
+ fseek(fp,-sizeof(uint),SEEK_CUR);
+ switch(rd) {
+ case WVTREE_HDR: return static_sequence_wvtree::load(fp);
+ }
+ return NULL;
+}
--- /dev/null
+/* static_sequence.h
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * static_sequence definition
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#ifndef _STATIC_SEQUENCE_H
+#define _STATIC_SEQUENCE_H
+
+
+#include <basics.h>
+#include <iostream>
+
+#define WVTREE_HDR 2
+
+using namespace std;
+
+/** Base class for static sequences, contains many abstract functions, so this can't
+ * be instantiated.
+ *
+ * @author Francisco Claude
+ */
+class static_sequence {
+
+public:
+ static_sequence();
+ virtual ~static_sequence();
+
+ /** Returns the number of occurrences of c until position i */
+ virtual uint rank(uint c, uint i)=0;
+
+ /** Returns the position of the i-th c
+ * @return (uint)-1 if i=0, len if i exceeds the number of cs */
+ virtual uint select(uint c, uint i)=0;
+
+ /** Returns the i-th element */
+ virtual uint access(uint i)=0;
+
+ /** Returns the length of the sequence */
+ virtual uint length();
+
+ /** Returns how many cs are in the sequence */
+ virtual uint count(uint c)=0;
+
+ /** Returns the size of the structure in bytes */
+ virtual uint size()=0;
+
+ /** Stores the bitmap given a file pointer, return 0 in case of success */
+ virtual uint save(FILE * fp)=0;
+
+ /** Reads a bitmap determining the type */
+ static static_sequence * load(FILE * fp);
+
+protected:
+ /** Length of the bitstring */
+ uint len;
+
+};
+
+#include <static_sequence_wvtree.h>
+
+#endif /* _STATIC_SEQUENCE_H */
--- /dev/null
+
+#include <static_sequence_wvtree.h>
+
+static_sequence_wvtree::static_sequence_wvtree(uint * symbols, uint n, wt_coder * c, static_bitsequence_builder * bmb, alphabet_mapper * am) {
+ for(uint i=0;i<n;i++)
+ symbols[i] = am->map(symbols[i]);
+ this->am = am;
+ this->c=c;
+ cout << "Building..."; cout.flush();
+ root = new wt_node_internal(symbols, n, 0, c, bmb);
+ cout << "done" << endl; cout.flush();
+ for(uint i=0;i<n;i++)
+ symbols[i] = am->unmap(symbols[i]);
+}
+
+static_sequence_wvtree::static_sequence_wvtree() {}
+
+
+static_sequence_wvtree::~static_sequence_wvtree() {
+ delete root;
+ delete am;
+ delete c;
+}
+
+uint static_sequence_wvtree::rank(uint symbol, uint pos) {
+ return root->rank(am->map(symbol), pos, 0, c);
+}
+
+uint static_sequence_wvtree::count(uint s) {
+ return root->rank(am->map(s), len-1, 0, c);
+}
+
+uint static_sequence_wvtree::select(uint symbol, uint pos) {
+ return root->select(am->map(symbol), pos, 0, c)-1;
+}
+
+uint static_sequence_wvtree::access(uint pos) {
+ return am->unmap(root->access(pos));
+}
+
+uint static_sequence_wvtree::size() {
+ /*cout << "WT: " << root->size() << endl;
+ cout << "Coder: " << c->size() << endl;
+ cout << "AM: " << am->size() << endl;*/
+ return sizeof(static_sequence_wvtree)+sizeof(uint)+root->size()+am->size()+c->size();
+}
+
+uint static_sequence_wvtree::save(FILE * fp) {
+ uint wr = WVTREE_HDR;
+ wr = fwrite(&wr,sizeof(uint),1,fp);
+ if(wr!=1) return 1;
+ wr = fwrite(&n,sizeof(uint),1,fp);
+ if(wr!=1) return 1;
+ if(c->save(fp)) return 1;
+ if(am->save(fp)) return 1;
+ if(root->save(fp)) return 1;
+ return 0;
+}
+
+static_sequence_wvtree * static_sequence_wvtree::load(FILE *fp) {
+ uint rd;
+ if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL;
+ if(rd!=WVTREE_HDR) return NULL;
+ static_sequence_wvtree * ret = new static_sequence_wvtree();
+ if(fread(&ret->n,sizeof(uint),1,fp)!=1) return NULL;
+ ret->c = wt_coder::load(fp);
+ ret->am = alphabet_mapper::load(fp);
+ ret->root = wt_node::load(fp);
+ return ret;
+}
+
--- /dev/null
+
+#ifndef STATIC_SEQUENCE_WVTREE_H
+#define STATIC_SEQUENCE_WVTREE_H
+#include <iostream>
+#include <cassert>
+#include <basics.h>
+#include <static_bitsequence.h>
+#include <static_bitsequence_builder.h>
+#include <wt_node_internal.h>
+#include <wt_coder_binary.h>
+#include <alphabet_mapper.h>
+#include <static_sequence.h>
+
+using namespace std;
+
+class static_sequence_wvtree : public static_sequence {
+ public:
+
+ /** Builds a Wavelet Tree for the string
+ * pointed by symbols assuming its length
+ * equals n and the test flag allows to set
+ * if the structure must be tested for
+ * correctness after being created (this is very expensive). */
+ static_sequence_wvtree(uint * symbols, uint n, wt_coder * coder, static_bitsequence_builder * bmb, alphabet_mapper * am);
+
+ virtual ~static_sequence_wvtree();
+
+ virtual uint rank(uint symbol, uint pos);
+
+ virtual uint select(uint symbol, uint i);
+
+ virtual uint access(uint pos);
+
+ virtual uint count(uint s);
+
+ virtual uint size();
+
+ virtual uint save(FILE * fp);
+ static static_sequence_wvtree * load(FILE *fp);
+ protected:
+
+ static_sequence_wvtree();
+
+ wt_node * root;
+ wt_coder * c;
+ alphabet_mapper * am;
+ //bitmap_builder * bmb;
+
+ /** Length of the string. */
+ uint n;
+
+ /** Height of the Wavelet Tree. */
+ uint max_v;
+
+ /** Flag for testing for correcteness. */
+ bool test;
+
+
+};
+#endif /* _STATIC_SEQUENCE_WVTREE_H */
--- /dev/null
+
+#include <wt_coder.h>
+
+wt_coder * wt_coder::load(FILE *fp) {
+ uint rd;
+ if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL;
+ fseek(fp,-sizeof(uint),SEEK_CUR);
+ switch(rd) {
+ case WT_CODER_HUFF_HDR: return wt_coder_huff::load(fp);
+ }
+ return NULL;
+}
--- /dev/null
+
+#ifndef wt_coder_h
+#define wt_coder_h
+
+#include <basics.h>
+#include <iostream>
+
+using namespace std;
+
+#define WT_CODER_HUFF_HDR 2
+
+class wt_coder {
+ public:
+ virtual ~wt_coder() {};
+ virtual bool is_set(uint symbol, uint l)=0;
+ virtual bool done(uint symbol, uint l)=0;
+ virtual uint size()=0;
+ virtual uint save(FILE *fp)=0;
+ static wt_coder * load(FILE *fp);
+};
+
+#include <wt_coder_huff.h>
+#include <wt_coder_binary.h>
+
+#endif
+
--- /dev/null
+
+#include <wt_coder_binary.h>
+
+wt_coder_binary::wt_coder_binary(uint * seq, uint n, alphabet_mapper * am) {
+ uint max_v = 0;
+ for(uint i=0;i<n;i++)
+ max_v = max(am->map(seq[i]),max_v);
+ h=bits(max_v);
+}
+
+wt_coder_binary::~wt_coder_binary() {}
+
+bool wt_coder_binary::is_set(uint symbol, uint l) {
+ if((1<<(h-l-1))&symbol) return true;
+ return false;
+}
+
+bool wt_coder_binary::done(uint symbol, uint l) {
+ if(l==h-1) return true;
+ return false;
+}
+
+uint wt_coder_binary::size() {
+ return sizeof(wt_coder_binary);
+}
+
--- /dev/null
+
+#ifndef wt_coder_binary_h
+#define wt_coder_binary_h
+
+#include <basics.h>
+#include <wt_coder.h>
+#include <alphabet_mapper.h>
+
+class wt_coder_binary: public wt_coder {
+ public:
+ wt_coder_binary(uint * seq, uint n, alphabet_mapper * am);
+ virtual ~wt_coder_binary();
+ virtual bool is_set(uint symbol, uint l);
+ virtual bool done(uint symbol, uint l);
+ virtual uint size();
+
+ protected:
+ uint h;
+};
+
+#endif
+
--- /dev/null
+
+#include <wt_coder_huff.h>
+
+wt_coder_huff::wt_coder_huff(uint * symbs, uint n, alphabet_mapper * am) {
+ for(uint i=0;i<n;i++)
+ symbs[i] = am->map(symbs[i]);
+ hc = new huffman_codes(symbs, n);
+ buffer = new uint[hc->max_length()/W+1];
+ s_len = 0; last_symbol = (uint)-1;
+ for(uint i=0;i<n;i++)
+ symbs[i] = am->unmap(symbs[i]);
+}
+
+wt_coder_huff::wt_coder_huff() {}
+
+wt_coder_huff::~wt_coder_huff() {
+ delete hc;
+ delete [] buffer;
+}
+
+bool wt_coder_huff::is_set(uint symbol, uint l) {
+ if(symbol!=last_symbol) {
+ s_len = (uint)hc->encode(symbol, buffer, (ulong)0);
+ last_symbol = symbol;
+ }
+ return bitget(buffer,l);
+}
+
+bool wt_coder_huff::done(uint symbol, uint l) {
+ if(symbol!=last_symbol) {
+ s_len = (uint)hc->encode(symbol, buffer, (ulong)0);
+ last_symbol = symbol;
+ }
+ return l==s_len;
+}
+
+uint wt_coder_huff::size() {
+ return 2*sizeof(uint)+sizeof(wt_coder_huff)+hc->size()+(hc->max_length()/W+1)*sizeof(uint);
+}
+
+uint wt_coder_huff::save(FILE * fp) {
+ uint wr = WT_CODER_HUFF_HDR;
+ wr = fwrite(&wr,sizeof(uint),1,fp);
+ if(wr!=1) return 1;
+ if(hc->save(fp)) return 1;
+ //if(am->save(fp)) return 1;
+ return 0;
+}
+
+wt_coder_huff * wt_coder_huff::load(FILE *fp) {
+ uint rd;
+ if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL;
+ if(rd!=WT_CODER_HUFF_HDR) return NULL;
+ wt_coder_huff * ret = new wt_coder_huff();
+ ret->hc = huffman_codes::load(fp);
+ ret->buffer = new uint[ret->hc->max_length()/W+1];
+ ret->s_len = 0; ret->last_symbol = (uint)-1;
+ return ret;
+}
--- /dev/null
+
+#ifndef wt_coder_huff_h
+#define wt_coder_huff_h
+
+#include <basics.h>
+#include <wt_coder.h>
+#include <huffman_codes.h>
+#include <alphabet_mapper.h>
+
+class wt_coder_huff: public wt_coder {
+ public:
+ wt_coder_huff(uint *symbs, uint n, alphabet_mapper * am);
+ virtual ~wt_coder_huff();
+ virtual bool is_set(uint symbol, uint l);
+ virtual bool done(uint symbol, uint l);
+ virtual uint size();
+ virtual uint save(FILE *fp);
+ static wt_coder_huff * load(FILE *fp);
+ uint * get_buffer(uint symbol, uint *n);
+
+ protected:
+ wt_coder_huff();
+ huffman_codes * hc;
+ uint * buffer;
+ uint last_symbol, s_len;
+};
+
+#endif
+
--- /dev/null
+/* wt_node.h
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * wt_node
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#include <wt_node.h>
+
+wt_node * wt_node::load(FILE *fp) {
+ uint rd;
+ if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL;
+ if(rd==WT_NODE_NULL_HDR) return NULL;
+ fseek(fp,-sizeof(uint),SEEK_CUR);
+ switch(rd) {
+ case WT_NODE_INTERNAL_HDR: return wt_node_internal::load(fp);
+ case WT_NODE_LEAF_HDR: return wt_node_leaf::load(fp);
+ }
+ return NULL;
+}
--- /dev/null
+/* wt_node.h
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * wt_node
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#ifndef wt_node_h
+#define wt_node_h
+
+#include <basics.h>
+#include <wt_coder.h>
+
+#define WT_NODE_NULL_HDR 0
+#define WT_NODE_INTERNAL_HDR 2
+#define WT_NODE_LEAF_HDR 3
+
+
+class wt_node {
+ public:
+ virtual ~wt_node() {}
+ virtual uint rank(uint symbol, uint pos, uint l, wt_coder * c)=0;
+ virtual uint select(uint symbol, uint pos, uint l, wt_coder * c)=0;
+ virtual uint access(uint pos)=0;
+ virtual uint size()=0;
+ virtual uint save(FILE *fp)=0;
+ static wt_node * load(FILE *fp);
+};
+
+#include <wt_node_internal.h>
+#include <wt_node_leaf.h>
+
+#endif
+
--- /dev/null
+
+#include <wt_node_internal.h>
+
+
+wt_node_internal::wt_node_internal(uint * symbols, uint n, uint l, wt_coder * c, static_bitsequence_builder * bmb) {
+ uint * ibitmap = new uint[n/W+1];
+ for(uint i=0;i<n/W+1;i++)
+ ibitmap[i]=0;
+ for(uint i=0;i<n;i++)
+ if(c->is_set(symbols[i],l))
+ bitset(ibitmap,i);
+ bitmap = bmb->build(ibitmap, n);
+ delete [] ibitmap;
+ uint count_right = bitmap->rank1(n-1);
+ uint count_left = n-count_right+1;
+ uint * left = new uint[count_left+1];
+ uint * right = new uint[count_right+1];
+ count_right = count_left = 0;
+ bool match_left = true, match_right = true;
+ for(uint i=0;i<n;i++) {
+ if(bitmap->access(i)) {
+ right[count_right++]=symbols[i];
+ if(count_right>1)
+ if(right[count_right-1]!=right[count_right-2])
+ match_right = false;
+ }
+ else {
+ left[count_left++]=symbols[i];
+ if(count_left>1)
+ if(left[count_left-1]!=left[count_left-2])
+ match_left = false;
+ }
+ }
+ if(count_left>0) {
+ if(match_left)
+ left_child = new wt_node_leaf(left[0], count_left);
+ else
+ left_child = new wt_node_internal(left, count_left, l+1, c, bmb);
+ } else {
+ left_child = NULL;
+ }
+ if(count_right>0) {
+ if(match_right)
+ right_child = new wt_node_leaf(right[0], count_right);
+ else
+ right_child = new wt_node_internal(right, count_right, l+1, c, bmb);
+ } else {
+ right_child = NULL;
+ }
+ delete [] left;
+ delete [] right;
+}
+
+wt_node_internal::wt_node_internal() { }
+
+wt_node_internal::~wt_node_internal() {
+ delete bitmap;
+ if(right_child!=NULL) delete right_child;
+ if(left_child!=NULL) delete left_child;
+}
+
+uint wt_node_internal::rank(uint symbol, uint pos, uint l, wt_coder * c) {
+ bool is_set = c->is_set(symbol,l);
+ if(!is_set) {
+ if(left_child==NULL) return 0;
+ return left_child->rank(symbol, bitmap->rank0(pos)-1,l+1,c);
+ }
+ else {
+ if(right_child==NULL) return 0;
+ return right_child->rank(symbol, bitmap->rank1(pos)-1,l+1,c);
+ }
+}
+
+uint wt_node_internal::select(uint symbol, uint pos, uint l, wt_coder * c) {
+ bool is_set = c->is_set(symbol, l);
+ if(!is_set) {
+ if(left_child==NULL)
+ return (uint)(-1);
+ uint new_pos = left_child->select(symbol, pos, l+1,c);
+ if(new_pos+1==0) return (uint)(-1);
+ return bitmap->select0(new_pos)+1;
+ } else {
+ if(right_child==NULL)
+ return (uint)(-1);
+ uint new_pos = right_child->select(symbol, pos, l+1,c);
+ if(new_pos+1==0) return (uint)(-1);
+ return bitmap->select1(new_pos)+1;
+ }
+}
+
+uint wt_node_internal::access(uint pos) {
+ bool is_set = bitmap->access(pos);
+ if(!is_set) {
+ assert(left_child!=NULL);
+ return left_child->access(bitmap->rank0(pos)-1);
+ } else {
+ assert(right_child!=NULL);
+ return right_child->access(bitmap->rank1(pos)-1);
+ }
+}
+
+uint wt_node_internal::size() {
+ uint s = bitmap->size()+sizeof(wt_node_internal);
+ if(left_child!=NULL)
+ s += left_child->size();
+ if(right_child!=NULL)
+ s += right_child->size();
+ return s;
+}
+
+uint wt_node_internal::save(FILE *fp) {
+ uint wr = WT_NODE_INTERNAL_HDR;
+ wr = fwrite(&wr,sizeof(uint),1,fp);
+ if(wr!=1) return 1;
+ if(bitmap->save(fp)) return 1;
+ if(left_child!=NULL) {
+ if(left_child->save(fp)) return 1;
+ } else {
+ wr = WT_NODE_NULL_HDR;
+ wr = fwrite(&wr,sizeof(uint),1,fp);
+ if(wr!=1) return 1;
+ }
+ if(right_child!=NULL) {
+ if(right_child->save(fp)) return 1;
+ } else {
+ wr = WT_NODE_NULL_HDR;
+ wr = fwrite(&wr,sizeof(uint),1,fp);
+ if(wr!=1) return 1;
+ }
+ return 0;
+}
+
+wt_node_internal * wt_node_internal::load(FILE *fp) {
+ uint rd;
+ if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL;
+ if(rd!=WT_NODE_INTERNAL_HDR) return NULL;
+ wt_node_internal * ret = new wt_node_internal();
+ ret->bitmap = static_bitsequence::load(fp);
+ ret->left_child = wt_node::load(fp);
+ ret->right_child = wt_node::load(fp);
+ return ret;
+}
--- /dev/null
+
+#ifndef wt_node_internal_h
+#define wt_node_internal_h
+
+#include <wt_node.h>
+#include <wt_node_leaf.h>
+#include <wt_coder.h>
+#include <basics.h>
+#include <static_bitsequence.h>
+#include <static_bitsequence_builder.h>
+#include <cassert>
+
+class wt_node_internal: public wt_node {
+ public:
+ wt_node_internal(uint * seq, uint n, uint l, wt_coder * c, static_bitsequence_builder * bmb);
+ virtual ~wt_node_internal();
+ virtual uint rank(uint symbol, uint pos, uint level, wt_coder * c);
+ virtual uint select(uint symbol, uint pos, uint level, wt_coder * c);
+ virtual uint access(uint pos);
+ virtual uint size();
+ virtual uint save(FILE *fp);
+ static wt_node_internal * load(FILE *fp);
+
+
+ protected:
+ wt_node_internal();
+ wt_node *left_child, *right_child;
+ static_bitsequence * bitmap;
+ //uint length;
+};
+
+#endif
+
--- /dev/null
+
+#include <wt_node_leaf.h>
+
+wt_node_leaf::wt_node_leaf(uint symbol, uint count) {
+ this->symbol = symbol;
+ this->count = count;
+}
+
+wt_node_leaf::wt_node_leaf() {}
+
+wt_node_leaf::~wt_node_leaf() {}
+
+uint wt_node_leaf::rank(uint symbol, uint pos, uint l, wt_coder * c) {
+ assert(symbol==this->symbol);
+ pos++;
+ assert(pos<=count);
+ return pos;
+}
+
+uint wt_node_leaf::select(uint symbol, uint pos, uint l, wt_coder * c) {
+ assert(symbol==this->symbol);
+ assert(pos<=count && pos>0);
+ return pos;
+}
+
+uint wt_node_leaf::access(uint pos) {
+ assert(pos<count);
+ return symbol;
+}
+
+uint wt_node_leaf::size() {
+ return sizeof(wt_node_leaf);
+}
+
+uint wt_node_leaf::save(FILE *fp) {
+ uint wr = WT_NODE_LEAF_HDR;
+ wr = fwrite(&wr,sizeof(uint),1,fp);
+ wr += fwrite(&count,sizeof(uint),1,fp);
+ wr += fwrite(&symbol,sizeof(uint),1,fp);
+ return wr-3;
+}
+
+wt_node_leaf * wt_node_leaf::load(FILE *fp) {
+ uint rd;
+ if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL;
+ if(rd!=WT_NODE_LEAF_HDR) return NULL;
+ wt_node_leaf * ret = new wt_node_leaf();
+ rd = fread(&(ret->count),sizeof(uint),1,fp);
+ rd += fread(&(ret->symbol),sizeof(uint),1,fp);
+ if(rd!=2) { delete ret; return NULL; }
+ return ret;
+}
--- /dev/null
+
+#ifndef wt_node_leaf_h
+#define wt_node_leaf_h
+
+#include <wt_node.h>
+#include <basics.h>
+#include <wt_coder.h>
+#include <cassert>
+
+class wt_node_leaf: public wt_node {
+ public:
+ wt_node_leaf(uint symbol, uint count);
+ virtual ~wt_node_leaf();
+ virtual uint rank(uint symbol, uint pos, uint l, wt_coder * c);
+ virtual uint select(uint symbol, uint pos, uint l, wt_coder * c);
+ virtual uint access(uint pos);
+ virtual uint size();
+ virtual uint save(FILE *fp);
+ static wt_node_leaf * load(FILE *fp);
+
+ protected:
+ wt_node_leaf();
+ uint symbol;
+ uint count;
+};
+
+#endif
+
--- /dev/null
+/* alphabet_mapper.cpp
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * static_bitsequence definition
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#include <alphabet_mapper.h>
+
+alphabet_mapper * alphabet_mapper::load(FILE *fp) {
+ uint rd;
+ if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL;
+ fseek(fp,-1*sizeof(uint),SEEK_CUR);
+ switch(rd) {
+ case ALPHABET_MAPPER_NONE_HDR: return alphabet_mapper_none::load(fp);
+ }
+ return NULL;
+}
--- /dev/null
+/* alphabet_mapper.h
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * static_bitsequence definition
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#ifndef _ALPHABET_MAPPER_H
+#define _ALPHABET_MAPPER_H
+
+#include <basics.h>
+#include <iostream>
+
+#define ALPHABET_MAPPER_NONE_HDR 2
+
+using namespace std;
+
+/** Base class for alphabet mappers
+ *
+ * @author Francisco Claude
+ */
+class alphabet_mapper {
+ public:
+ virtual ~alphabet_mapper() {}
+ virtual uint map(uint s)=0;
+ virtual uint unmap(uint s)=0;
+ virtual uint size()=0;
+ virtual uint save(FILE *fp)=0;
+ static alphabet_mapper * load(FILE * fp);
+};
+
+#include <alphabet_mapper_none.h>
+
+#endif /* _ALPHABET_MAPPER_H */
--- /dev/null
+
+#include <alphabet_mapper_none.h>
+
+alphabet_mapper_none::alphabet_mapper_none() { }
+
+uint alphabet_mapper_none::map(uint s) {return s;}
+
+uint alphabet_mapper_none::unmap(uint s) {return s;}
+
+uint alphabet_mapper_none::size() { return sizeof(alphabet_mapper_none); }
+
+uint alphabet_mapper_none::save(FILE *fp) {
+ uint wr = ALPHABET_MAPPER_NONE_HDR;
+ wr = fwrite(&wr,sizeof(uint),1,fp);
+ if(wr!=1) return 1;
+ return 0;
+}
+
+alphabet_mapper_none * alphabet_mapper_none::load(FILE * fp) {
+ uint rd;
+ if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL;
+ if(rd!=ALPHABET_MAPPER_NONE_HDR) return NULL;
+ return new alphabet_mapper_none();
+}
+
--- /dev/null
+/* alphabet_mapper_none.h
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * static_bitsequence definition
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#ifndef _ALPHABET_MAPPER_NONE_H
+#define _ALPHABET_MAPPER_NONE_H
+
+#include <basics.h>
+#include <iostream>
+#include <alphabet_mapper.h>
+
+using namespace std;
+
+/** Mapper that doesn't change the value (identity)
+ *
+ * @author Francisco Claude
+ */
+class alphabet_mapper_none : public alphabet_mapper {
+ public:
+ alphabet_mapper_none();
+ virtual ~alphabet_mapper_none() {}
+ virtual uint map(uint s);
+ virtual uint unmap(uint s);
+ virtual uint size();
+ virtual uint save(FILE *fp);
+ static alphabet_mapper_none * load(FILE *fp);
+};
+
+#endif /* _ALPHABET_MAPPER_NONE_H */
--- /dev/null
+CPP=g++
+
+#CPPFLAGS=-g3 -Wall -I../includes/
+CPPFLAGS=-O9 -Wall -DNDEBUG -I../includes/
+
+OBJECTS=test_naive.o test_rrr02.o test_brw32.o make_bitmap.o test_wvtree01.o test_wvtree02.o
+BIN=test_naive test_rrr02 test_brw32 make_bitmap test_wvtree01 test_wvtree02
+LIB=../lib/libcds.a
+
+%.o: %.cpp
+ $(CPP) $(CPPFLAGS) -c $< -o $@
+
+all: $(OBJECTS) $(BIN)
+
+test_naive:
+ $(CPP) $(CPPFLAGS) -o test_naive test_naive.o $(LIB)
+
+test_rrr02:
+ $(CPP) $(CPPFLAGS) -o test_rrr02 test_rrr02.o $(LIB)
+
+test_brw32:
+ $(CPP) $(CPPFLAGS) -o test_brw32 test_brw32.o $(LIB)
+
+make_bitmap:
+ $(CPP) $(CPPFLAGS) -o make_bitmap make_bitmap.o $(LIB)
+
+test_wvtree01:
+ $(CPP) $(CPPFLAGS) -o test_wvtree01 test_wvtree01.o $(LIB)
+
+test_wvtree02:
+ $(CPP) $(CPPFLAGS) -o test_wvtree02 test_wvtree02.o $(LIB)
+
+clean:
+ rm -f $(OBJECTS) $(BIN)
--- /dev/null
+
+#include <iostream>
+#include <cstring>
+#include <cstdlib>
+#include <basics.h>
+#include <cmath>
+
+using namespace std;
+
+int main(int argc, char ** argv) {
+ if(argc!=4) {
+ cout << "usage: " << argv[0] << " <bitmap> <length> <ones>" << endl;
+ return 0;
+ }
+ char * fname = argv[1];
+ uint len = atoi(argv[2]);
+ uint ones = atoi(argv[3]);
+ uint * bm = new uint[uint_len(len,1)];
+ for(uint i=0;i<uint_len(len,1);i++)
+ bm[i] = 0;
+ for(uint i=0;i<ones;i++) {
+ while(true) {
+ uint p = rand()%len;
+ if(!bitget(bm,p)) {
+ bitset(bm,p);
+ break;
+ }
+ }
+ }
+ FILE * fp = fopen(fname,"w");
+ uint l = fwrite(&len,sizeof(uint),1,fp);
+ l += fwrite(bm,sizeof(uint),uint_len(len,1),fp);
+ fclose(fp);
+ delete [] bm;
+ return 0;
+}
+
--- /dev/null
+
+#include <iostream>
+#include <sstream>
+#include <basics.h>
+#include <static_bitsequence.h>
+
+using namespace std;
+
+int main(int argc, char ** argv) {
+ if(argc!=3) {
+ cout << "usage: " << argv[0] << " <bitmap_file> <sample_rate>" << endl;
+ return 0;
+ }
+ FILE * fp = fopen(argv[1],"r");
+ if(fp==NULL) {
+ cout << "Error opening " << argv[1] << endl;
+ return -1;
+ }
+ uint *bitseq, len;
+ uint l = fread(&len, sizeof(uint), 1, fp);
+ bitseq = new uint[uint_len(len,1)];
+ l += fread(bitseq, sizeof(uint), uint_len(len,1), fp);
+ fclose(fp);
+
+ uint sample_rate;
+ stringstream ss(argv[2]);
+ ss >> sample_rate;
+
+ static_bitsequence * bs = new static_bitsequence_brw32(bitseq,len,sample_rate);
+
+ char * fname = new char[string(argv[1]).length()+10];
+ sprintf(fname,"%s.rrr",argv[1]);
+
+ fp = fopen(fname,"w");
+ cout << "Save: " << bs->save(fp) << endl;
+ fclose(fp);
+ delete bs;
+
+ fp = fopen(fname,"r");
+ bs = static_bitsequence::load(fp);
+ cout << bs << endl;
+ fclose(fp);
+ delete [] fname;
+
+ cout << "Bitmap length: " << len << " =? " << bs->length() << endl;
+ cout << "Ones: " << bs->count_one() << endl;
+ cout << "Bitmap size: " << bs->size() << endl;
+ /*for(uint i=0;i<64;i++) {
+ if(i%15==0) cout << " ";
+ cout << (bs->access(i)?"1":"0");
+ }
+ cout << endl;*/
+ uint ones = 0;
+ for(uint i=0;i<len;i++) {
+ //cout << "i="<<i<< endl;
+ if(i%(max(1,bs->length()/100))==0) cout << i/(max(1,bs->length()/100)) << "%" << endl;
+ if(bitget(bitseq,i)) ones++;
+ if(bs->access(i) != (bitget(bitseq,i)!=0)) {
+ cout << "Access error for position " << i << endl;
+ cout << " got: " << bs->access(i) << " expected: " << (bitget(bitseq,i)!=0) << endl;
+ }
+ if(bs->rank1(i) != ones) {
+ cout << "Rank1 error for position " << i << endl;
+ cout << " got: " << bs->rank1(i) << " expected: " << ones << endl;
+ }
+ if(bitget(bitseq,i) && bs->select1(ones) != i) {
+ cout << "Select1 error for position " << i << " ones:" << ones << endl;
+ cout << " got: " << bs->select1(ones) << " expected: " << i << endl;
+ }
+ if(bs->rank0(i) != i+1-ones) {
+ cout << "Rank0 error for position " << i << endl;
+ cout << " got: " << bs->rank0(i) << " expected: " << ones << endl;
+ }
+ if(!bitget(bitseq,i) && bs->select0(i+1-ones) != i) {
+ cout << "Select0 error for position " << i << endl;
+ cout << " got: " << bs->select0(i+1-ones) << " expected: " << i << endl;
+ }
+ }
+ delete bs;
+ delete [] bitseq;
+ cout << "Test completed." << endl;
+ return 0;
+}
+
--- /dev/null
+
+#include <iostream>
+#include <basics.h>
+#include <static_bitsequence.h>
+
+using namespace std;
+
+int main(int argc, char ** argv) {
+ if(argc!=2) {
+ cout << "usage: " << argv[0] << " <bitmap_file>" << endl;
+ return 0;
+ }
+ FILE * fp = fopen(argv[1],"r");
+ if(fp==NULL) {
+ cout << "Error opening " << argv[1] << endl;
+ return -1;
+ }
+ uint *bitseq, len;
+ uint l = fread(&len, sizeof(uint), 1, fp);
+ bitseq = new uint[uint_len(len,1)];
+ l += fread(bitseq, sizeof(uint), uint_len(len,1), fp);
+ static_bitsequence * bs = new static_bitsequence_naive(bitseq,len);
+ cout << "Bitmap length: " << len << " =? " << bs->length() << endl;
+ uint ones = 0;
+ for(uint i=0;i<len;i++) {
+ if(bitget(bitseq,i)) ones++;
+ if(bs->rank1(i) != ones) {
+ cout << "Rank1 error for position " << i << endl;
+ cout << " got: " << bs->rank1(i) << " expected: " << ones << endl;
+ }
+ if(bitget(bitseq,i) && bs->select1(ones) != i) {
+ cout << "Select1 error for position " << i << endl;
+ cout << " got: " << bs->select1(ones) << " expected: " << i << endl;
+ }
+ if(bs->rank0(i) != i+1-ones) {
+ cout << "Rank0 error for position " << i << endl;
+ cout << " got: " << bs->rank0(i) << " expected: " << ones << endl;
+ }
+ if(!bitget(bitseq,i) && bs->select0(i+1-ones) != i) {
+ cout << "Select0 error for position " << i << endl;
+ cout << " got: " << bs->select0(ones) << " expected: " << i << endl;
+ }
+ }
+ delete bs;
+ fclose(fp);
+ cout << "Test completed." << endl;
+ return 0;
+}
+
--- /dev/null
+
+#include <iostream>
+#include <sstream>
+#include <basics.h>
+#include <static_bitsequence.h>
+
+using namespace std;
+
+int main(int argc, char ** argv) {
+ if(argc!=3) {
+ cout << "usage: " << argv[0] << " <bitmap_file> <sample_rate>" << endl;
+ return 0;
+ }
+ FILE * fp = fopen(argv[1],"r");
+ if(fp==NULL) {
+ cout << "Error opening " << argv[1] << endl;
+ return -1;
+ }
+ uint *bitseq, len;
+ uint l=fread(&len, sizeof(uint), 1, fp);
+ bitseq = new uint[uint_len(len,1)];
+ l+=fread(bitseq, sizeof(uint), uint_len(len,1), fp);
+ fclose(fp);
+
+ static_bitsequence * bs = new static_bitsequence_rrr02(bitseq,len);
+
+ char * fname = new char[string(argv[1]).length()+10];
+ sprintf(fname,"%s.rrr",argv[1]);
+
+ fp = fopen(fname,"w");
+ cout << "Save: " << bs->save(fp) << endl;
+ fclose(fp);
+ delete bs;
+
+ fp = fopen(fname,"r");
+ bs = static_bitsequence::load(fp);
+ uint sample_rate;
+ stringstream ss(argv[2]);
+ ss >> sample_rate;
+ ((static_bitsequence_rrr02*)bs)->create_sampling(sample_rate);
+ fclose(fp);
+ delete [] fname;
+
+ cout << "Bitmap length: " << len << " =? " << bs->length() << endl;
+ cout << "Ones: " << bs->count_one() << endl;
+ cout << "Bitmap size: " << bs->size() << endl;
+ /*for(uint i=0;i<64;i++) {
+ if(i%15==0) cout << " ";
+ cout << (bs->access(i)?"1":"0");
+ }
+ cout << endl;*/
+ uint ones = 0;
+ for(uint i=0;i<len;i++) {
+ //cout << "i="<<i<< endl;
+ if(i%max(1,(bs->length()/100))==0) cout << i/max(1,(bs->length()/100)) << "%" << endl;
+ if(bitget(bitseq,i)) ones++;
+ if(bs->access(i) != (bitget(bitseq,i)!=0)) {
+ cout << "Access error for position " << i << endl;
+ cout << " got: " << bs->access(i) << " expected: " << (bitget(bitseq,i)!=0) << endl;
+ }
+ if(bs->rank1(i) != ones) {
+ cout << "Rank1 error for position " << i << endl;
+ cout << " got: " << bs->rank1(i) << " expected: " << ones << endl;
+ }
+ if(bitget(bitseq,i) && bs->select1(ones) != i) {
+ cout << "Select1 error for position " << i << " ones:" << ones << endl;
+ cout << " got: " << bs->select1(ones) << " expected: " << i << endl;
+ }
+ if(bs->rank0(i) != i+1-ones) {
+ cout << "Rank0 error for position " << i << endl;
+ cout << " got: " << bs->rank0(i) << " expected: " << ones << endl;
+ }
+ if(!bitget(bitseq,i) && bs->select0(i+1-ones) != i) {
+ cout << "Select0 error for position " << i << endl;
+ cout << " got: " << bs->select0(i+1-ones) << " expected: " << i << endl;
+ }
+ }
+ delete bs;
+ //static_bitsequence_rrr02::delete_E();
+ delete [] bitseq;
+ cout << "Test completed." << endl;
+ return 0;
+}
+
--- /dev/null
+
+#include <sys/stat.h>
+#include <iostream>
+#include <sstream>
+#include <basics.h>
+#include <static_bitsequence.h>
+#include <alphabet_mapper.h>
+#include <static_sequence.h>
+
+using namespace std;
+
+int main(int argc, char ** argv) {
+ if(argc!=3) {
+ cout << "usage: " << argv[0] << " <file> <samp>" << endl;
+ return 0;
+ }
+ struct stat text_info;
+ if(stat(argv[1],&text_info)<0) {
+ cout << "could not stat: " << argv[1] << endl;
+ return -1;
+ }
+
+ stringstream ss;
+ ss << argv[2];
+ uint samp;
+ ss >> samp;
+
+ uint n= (uint)text_info.st_size/4;
+ uint * text = new uint[n+1];
+ FILE * fp = fopen(argv[1],"r");
+ if(fp==NULL) {
+ cout << "could not open " << argv[1] << endl;
+ return -1;
+ }
+
+ cout << "File: " << argv[1] << endl;
+ cout << "Length: " << n << endl;
+
+ uint max_symbol = 0;
+ for(uint i=0;i<n;i++) {
+ uint c;
+ uint read=fread(&c,sizeof(uint),1,fp);
+ //assert(read==1);
+ text[i] = (uint)c;
+ c += read;
+ max_symbol = max(max_symbol,text[i]);
+ }
+ max_symbol++;
+
+ fclose(fp);
+
+ /*uint *occ = new uint[max_symbol];
+ for(uint i=0;i<max_symbol;i++)
+ occ[i] = 0;
+ for(uint i=0;i<n;i++)
+ occ[text[i]]++;*/
+
+ alphabet_mapper * am = new alphabet_mapper_none();
+ static_bitsequence_builder * bmb = new static_bitsequence_builder_rrr02(samp);
+ cout << "Building Huffman table..."; cout.flush();
+ wt_coder * wtc = new wt_coder_huff(text,n,am);
+ cout << "done" << endl; cout.flush();
+ static_sequence * wt = new static_sequence_wvtree(text,n,wtc,bmb,am,true);
+ delete bmb;
+
+ char * fname = new char[10+string(argv[1]).length()];
+ sprintf(fname,"%s.wt",argv[1]);
+ fp = fopen(fname,"w");
+ wt->save(fp);
+ fclose(fp);
+ delete wt;
+ fp = fopen(fname,"r");
+ wt = static_sequence::load(fp);
+ fclose(fp);
+ delete [] fname;
+
+
+ cout << "WT Size: " << wt->size() << endl;
+ cout << "ft = " << 1.*wt->size()/(bits(max_symbol-1)*n/8) << endl;
+
+ delete [] text;
+ delete wt;
+
+}
--- /dev/null
+
+#include <sys/stat.h>
+#include <iostream>
+#include <sstream>
+#include <basics.h>
+#include <static_bitsequence.h>
+#include <alphabet_mapper.h>
+#include <static_sequence.h>
+
+using namespace std;
+
+int main(int argc, char ** argv) {
+ if(argc!=3) {
+ cout << "usage: " << argv[0] << " <file> <samp>" << endl;
+ return 0;
+ }
+ struct stat text_info;
+ if(stat(argv[1],&text_info)<0) {
+ cout << "could not stat: " << argv[1] << endl;
+ return -1;
+ }
+
+ uint samp;
+ {
+ stringstream ss;
+ ss << string(argv[2]);
+
+ ss >> samp;
+ }
+
+ uint n= (uint)text_info.st_size;
+ uint * text = new uint[n+1];
+ FILE * fp = fopen(argv[1],"r");
+ if(fp==NULL) {
+ cout << "could not open " << argv[1] << endl;
+ return -1;
+ }
+
+ cout << "File: " << argv[1] << endl;
+ cout << "Length: " << n << endl;
+
+ uint max_symbol = 0;
+ for(uint i=0;i<n;i++) {
+ uchar c;
+ uint read=fread(&c,sizeof(uchar),1,fp);
+ //assert(read==1);
+ text[i] = (uint)c;
+ c += read;
+ max_symbol = max(max_symbol,text[i]);
+ }
+ max_symbol++;
+
+ fclose(fp);
+
+ /*uint *occ = new uint[max_symbol];
+ for(uint i=0;i<max_symbol;i++)
+ occ[i] = 0;
+ for(uint i=0;i<n;i++)
+ occ[text[i]]++;*/
+
+ alphabet_mapper * am = new alphabet_mapper_none();
+ static_bitsequence_builder * bmb = new static_bitsequence_builder_rrr02(samp);
+ cout << "Building Huffman table..."; cout.flush();
+ wt_coder * wtc = new wt_coder_huff(text,n,am);
+ cout << "done" << endl; cout.flush();
+ static_sequence * wt = new static_sequence_wvtree(text,n,wtc,bmb,am,true);
+ delete bmb;
+
+ char * fname = new char[10+string(argv[1]).length()];
+ sprintf(fname,"%s.wt",argv[1]);
+ fp = fopen(fname,"w");
+ cout << "save: " << wt->save(fp) << endl;
+ fclose(fp);
+ delete wt;
+ fp = fopen(fname,"r");
+ wt = static_sequence::load(fp);
+ fclose(fp);
+ delete [] fname;
+
+ ((static_sequence_wvtree*)wt)->test_structure(text,n);
+
+ cout << "WT Size: " << wt->size() << endl;
+ cout << "ft = " << 1.*wt->size()/n << endl;
+
+ fname = new char[10+string(argv[1]).length()];
+ sprintf(fname,"%s.wt",argv[1]);
+ fp = fopen(fname,"w");
+ cout << "save: " << wt->save(fp) << endl;
+ fclose(fp);
+ delete [] fname;
+
+ delete [] text;
+ delete wt;
+
+}
--- /dev/null
+\r
+\r
+#include "XMLTree.h"\r
+\r
+\r
+void traverseXML(XMLTree *X, treeNode x)\r
+ {\r
+ int j;\r
+ int d = X->NumChildren(x);\r
+ treeNode y;\r
+ \r
+ y = X->FirstChild(x);\r
+ for (j = 0; j < d; j++) {\r
+ traverseXML(X, y);\r
+ y = X->NextSibling(y);\r
+ } \r
+ }\r
+\r
+\r
+int main() \r
+ {\r
+ XMLTree *X, *Y;\r
+ int rr, n, i;\r
+ unsigned char openTag[]="A", closeTag[]="/A", filename[]="testXML";\r
+ treeNode x;\r
+\r
+ n = 4999999;\r
+\r
+ X = new XMLTree();\r
+\r
+ \r
+\r
+ X->OpenDocument(false, 32);\r
+\r
+ rr = 0;\r
+ for (i = 1; i < n; i++) {\r
+ //if (rand() % 100 < r) setbit(B,i,1);\r
+ if (rr > (n-i)) {\r
+ printf("???? rr=%d n=%d i=%d\n",rr,n,i);\r
+ //exit(1);\r
+ } else if (rr >= (n-i)) {\r
+ X->NewClosingTag(closeTag);\r
+ if ((1+(int) (2.0*rand()/(RAND_MAX+1.0))) == 1)\r
+ X->NewEmptyText();\r
+ else X->NewText(NULL); // just a test, NULL string\r
+ rr--;\r
+ } \r
+ else {\r
+ if (rand() % (rr+1) < (rr+1)/2 || (rr<=1)) {\r
+ X->NewOpenTag(openTag);\r
+ if ((1+(int) (2.0*rand()/(RAND_MAX+1.0))) == 1)\r
+ X->NewEmptyText();\r
+ else X->NewText(NULL);\r
+ rr++;\r
+ } \r
+ else {\r
+ X->NewClosingTag(closeTag);\r
+ if ((1+(int) (2.0*rand()/(RAND_MAX+1.0))) <= 1)\r
+ X->NewEmptyText();\r
+ else X->NewText(NULL);\r
+ rr--;\r
+ }\r
+ }\r
+ }\r
+ \r
+ X->CloseDocument();\r
+ \r
+ for (i=0; i <= (n+1)/2; i++) \r
+ X->TextXMLId(i);\r
+ \r
+ x = X->Root();\r
+ \r
+ traverseXML(X, x); \r
+\r
+ X->Save(filename);\r
+\r
+ delete X;\r
+\r
+ X = XMLTree::Load(filename, 32);\r
+ \r
+ x = X->Root();\r
+ \r
+ traverseXML(X, x); \r
+ \r
+ }\r
+\r