From: kim Date: Mon, 13 Feb 2012 14:30:23 +0000 (+0000) Subject: Import libcds. X-Git-Url: http://git.nguyen.vg/gitweb/?a=commitdiff_plain;h=refs%2Fheads%2Ftrunk;p=SXSI%2Flibcds.git Import libcds. git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/libcds@1205 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- diff --git a/Doxyfile b/Doxyfile new file mode 100644 index 0000000..aab437f --- /dev/null +++ b/Doxyfile @@ -0,0 +1,297 @@ +# Doxyfile 1.5.5 + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- +DOXYFILE_ENCODING = UTF-8 +PROJECT_NAME = libcds +PROJECT_NUMBER = 0.8 +OUTPUT_DIRECTORY = docs/ +CREATE_SUBDIRS = NO +OUTPUT_LANGUAGE = English +BRIEF_MEMBER_DESC = YES +REPEAT_BRIEF = YES +ABBREVIATE_BRIEF = "The $name class" \ + "The $name widget" \ + "The $name file" \ + is \ + provides \ + specifies \ + contains \ + represents \ + a \ + an \ + the +ALWAYS_DETAILED_SEC = NO +INLINE_INHERITED_MEMB = NO +FULL_PATH_NAMES = YES +STRIP_FROM_PATH = src/ +STRIP_FROM_INC_PATH = +SHORT_NAMES = NO +JAVADOC_AUTOBRIEF = NO +QT_AUTOBRIEF = NO +MULTILINE_CPP_IS_BRIEF = NO +DETAILS_AT_TOP = NO +INHERIT_DOCS = YES +SEPARATE_MEMBER_PAGES = NO +TAB_SIZE = 2 +ALIASES = +OPTIMIZE_OUTPUT_FOR_C = NO +OPTIMIZE_OUTPUT_JAVA = NO +OPTIMIZE_FOR_FORTRAN = NO +OPTIMIZE_OUTPUT_VHDL = NO +BUILTIN_STL_SUPPORT = NO +CPP_CLI_SUPPORT = NO +SIP_SUPPORT = NO +DISTRIBUTE_GROUP_DOC = NO +SUBGROUPING = YES +TYPEDEF_HIDES_STRUCT = NO +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- +EXTRACT_ALL = YES +EXTRACT_PRIVATE = YES +EXTRACT_STATIC = YES +EXTRACT_LOCAL_CLASSES = YES +EXTRACT_LOCAL_METHODS = NO +EXTRACT_ANON_NSPACES = NO +HIDE_UNDOC_MEMBERS = NO +HIDE_UNDOC_CLASSES = NO +HIDE_FRIEND_COMPOUNDS = NO +HIDE_IN_BODY_DOCS = NO +INTERNAL_DOCS = NO +CASE_SENSE_NAMES = YES +HIDE_SCOPE_NAMES = NO +SHOW_INCLUDE_FILES = YES +INLINE_INFO = YES +SORT_MEMBER_DOCS = YES +SORT_BRIEF_DOCS = NO +SORT_GROUP_NAMES = NO +SORT_BY_SCOPE_NAME = NO +GENERATE_TODOLIST = YES +GENERATE_TESTLIST = YES +GENERATE_BUGLIST = YES +GENERATE_DEPRECATEDLIST= YES +ENABLED_SECTIONS = +MAX_INITIALIZER_LINES = 30 +SHOW_USED_FILES = YES +SHOW_DIRECTORIES = NO +FILE_VERSION_FILTER = +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- +QUIET = NO +WARNINGS = YES +WARN_IF_UNDOCUMENTED = YES +WARN_IF_DOC_ERROR = YES +WARN_NO_PARAMDOC = NO +WARN_FORMAT = "$file:$line: $text" +WARN_LOGFILE = +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- +INPUT = src +INPUT_ENCODING = UTF-8 +FILE_PATTERNS = *.c \ + *.cc \ + *.cxx \ + *.cpp \ + *.c++ \ + *.d \ + *.java \ + *.ii \ + *.ixx \ + *.ipp \ + *.i++ \ + *.inl \ + *.h \ + *.hh \ + *.hxx \ + *.hpp \ + *.h++ \ + *.idl \ + *.odl \ + *.cs \ + *.php \ + *.php3 \ + *.inc \ + *.m \ + *.mm \ + *.dox \ + *.py \ + *.f90 \ + *.f \ + *.vhd \ + *.vhdl \ + *.C \ + *.CC \ + *.C++ \ + *.II \ + *.I++ \ + *.H \ + *.HH \ + *.H++ \ + *.CS \ + *.PHP \ + *.PHP3 \ + *.M \ + *.MM \ + *.PY \ + *.F90 \ + *.F \ + *.VHD \ + *.VHDL +RECURSIVE = YES +EXCLUDE = +EXCLUDE_SYMLINKS = NO +EXCLUDE_PATTERNS = +EXCLUDE_SYMBOLS = +EXAMPLE_PATH = +EXAMPLE_PATTERNS = * +EXAMPLE_RECURSIVE = NO +IMAGE_PATH = +INPUT_FILTER = +FILTER_PATTERNS = +FILTER_SOURCE_FILES = NO +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- +SOURCE_BROWSER = YES +INLINE_SOURCES = NO +STRIP_CODE_COMMENTS = YES +REFERENCED_BY_RELATION = NO +REFERENCES_RELATION = NO +REFERENCES_LINK_SOURCE = YES +USE_HTAGS = NO +VERBATIM_HEADERS = NO +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- +ALPHABETICAL_INDEX = NO +COLS_IN_ALPHA_INDEX = 5 +IGNORE_PREFIX = +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- +GENERATE_HTML = YES +HTML_OUTPUT = html +HTML_FILE_EXTENSION = .html +HTML_HEADER = +HTML_FOOTER = +HTML_STYLESHEET = +HTML_ALIGN_MEMBERS = YES +GENERATE_HTMLHELP = NO +GENERATE_DOCSET = NO +DOCSET_FEEDNAME = "Doxygen generated docs" +DOCSET_BUNDLE_ID = org.doxygen.Project +HTML_DYNAMIC_SECTIONS = NO +CHM_FILE = +HHC_LOCATION = +GENERATE_CHI = NO +BINARY_TOC = NO +TOC_EXPAND = NO +DISABLE_INDEX = NO +ENUM_VALUES_PER_LINE = 4 +GENERATE_TREEVIEW = NO +TREEVIEW_WIDTH = 250 +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- +GENERATE_LATEX = YES +LATEX_OUTPUT = latex +LATEX_CMD_NAME = latex +MAKEINDEX_CMD_NAME = makeindex +COMPACT_LATEX = NO +PAPER_TYPE = a4wide +EXTRA_PACKAGES = +LATEX_HEADER = +PDF_HYPERLINKS = YES +USE_PDFLATEX = YES +LATEX_BATCHMODE = NO +LATEX_HIDE_INDICES = NO +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- +GENERATE_RTF = NO +RTF_OUTPUT = rtf +COMPACT_RTF = NO +RTF_HYPERLINKS = NO +RTF_STYLESHEET_FILE = +RTF_EXTENSIONS_FILE = +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- +GENERATE_MAN = NO +MAN_OUTPUT = man +MAN_EXTENSION = .3 +MAN_LINKS = NO +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- +GENERATE_XML = NO +XML_OUTPUT = xml +XML_SCHEMA = +XML_DTD = +XML_PROGRAMLISTING = YES +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- +GENERATE_AUTOGEN_DEF = NO +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- +GENERATE_PERLMOD = NO +PERLMOD_LATEX = NO +PERLMOD_PRETTY = YES +PERLMOD_MAKEVAR_PREFIX = +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- +ENABLE_PREPROCESSING = YES +MACRO_EXPANSION = NO +EXPAND_ONLY_PREDEF = NO +SEARCH_INCLUDES = YES +INCLUDE_PATH = +INCLUDE_FILE_PATTERNS = +PREDEFINED = +EXPAND_AS_DEFINED = +SKIP_FUNCTION_MACROS = YES +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- +TAGFILES = +GENERATE_TAGFILE = +ALLEXTERNALS = NO +EXTERNAL_GROUPS = YES +PERL_PATH = /usr/bin/perl +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- +CLASS_DIAGRAMS = NO +MSCGEN_PATH = +HIDE_UNDOC_RELATIONS = YES +HAVE_DOT = YES +CLASS_GRAPH = YES +COLLABORATION_GRAPH = YES +GROUP_GRAPHS = YES +UML_LOOK = NO +TEMPLATE_RELATIONS = NO +INCLUDE_GRAPH = YES +INCLUDED_BY_GRAPH = YES +CALL_GRAPH = YES +CALLER_GRAPH = NO +GRAPHICAL_HIERARCHY = YES +DIRECTORY_GRAPH = YES +DOT_IMAGE_FORMAT = png +DOT_PATH = +DOTFILE_DIRS = +DOT_GRAPH_MAX_NODES = 50 +MAX_DOT_GRAPH_DEPTH = 1000 +DOT_TRANSPARENT = YES +DOT_MULTI_TARGETS = NO +GENERATE_LEGEND = YES +DOT_CLEANUP = YES +#--------------------------------------------------------------------------- +# Configuration::additions related to the search engine +#--------------------------------------------------------------------------- +SEARCHENGINE = NO diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..678a108 --- /dev/null +++ b/Makefile @@ -0,0 +1,32 @@ + +all: clean libcompact + + +doc: + @echo " [DOC] Generating documentation" + @doxygen + +libcompact: + @echo " [MSG] Entering directory src" + @make --no-print-directory -C src + +tests: libcompact + @echo " [MSG] Entering directory tests" + @make --no-print-directory -C tests + +clean: + @echo " [MSG] Entering directory src" + @make --no-print-directory -C src clean + @echo " [MSG] Entering directory tests" + @make --no-print-directory -C tests clean + @echo " [CLN] Cleaning docs folder" + @rm -rf docs/* + @touch docs/delete_me + @echo " [CLN] Cleaning lib folder" + @rm -f lib/* + @touch lib/delete_me + @echo " [CLN] Cleaning includes folder" + @rm -f includes/* + @touch includes/delete_me + + diff --git a/docs/delete_me b/docs/delete_me new file mode 100644 index 0000000..e69de29 diff --git a/includes/delete_me b/includes/delete_me new file mode 100644 index 0000000..e69de29 diff --git a/lib/delete_me b/lib/delete_me new file mode 100644 index 0000000..e69de29 diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..aaec1e9 --- /dev/null +++ b/src/Makefile @@ -0,0 +1,40 @@ +CPP=g++ + +CPPFLAGS=-O3 -Wall -DNDEBUG -fno-PIC + +INCL=-I../includes/ + +CODERS_DIR=coders +CODERS_OBJECTS=$(CODERS_DIR)/huff.o $(CODERS_DIR)/huffman_codes.o + +STATIC_PERMUTATION_DIR=static_permutation +STATIC_PERMUTATION_OBJECTS=$(STATIC_PERMUTATION_DIR)/perm.o $(STATIC_PERMUTATION_DIR)/static_permutation.o $(STATIC_PERMUTATION_DIR)/static_permutation_mrrr.o $(STATIC_PERMUTATION_DIR)/static_permutation_builder_mrrr.o + +STATIC_BITSEQUENCE_DIR=static_bitsequence +STATIC_BITSEQUENCE_OBJECTS=$(STATIC_BITSEQUENCE_DIR)/static_bitsequence.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_naive.o $(STATIC_BITSEQUENCE_DIR)/table_offset.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_rrr02.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_brw32.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_builder_rrr02.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_builder_brw32.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_rrr02_light.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_builder_rrr02_light.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_sdarray.o $(STATIC_BITSEQUENCE_DIR)/sdarray.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_builder_sdarray.o + +STATIC_SEQUENCE_DIR=static_sequence +STATIC_SEQUENCE_OBJECTS=$(STATIC_SEQUENCE_DIR)/static_sequence.o $(STATIC_SEQUENCE_DIR)/static_sequence_wvtree.o $(STATIC_SEQUENCE_DIR)/wt_coder_binary.o $(STATIC_SEQUENCE_DIR)/wt_coder_huff.o $(STATIC_SEQUENCE_DIR)/wt_node_internal.o $(STATIC_SEQUENCE_DIR)/wt_node_leaf.o $(STATIC_SEQUENCE_DIR)/wt_coder.o $(STATIC_SEQUENCE_DIR)/wt_node.o $(STATIC_SEQUENCE_DIR)/static_sequence_gmr_chunk.o $(STATIC_SEQUENCE_DIR)/static_sequence_builder_gmr_chunk.o $(STATIC_SEQUENCE_DIR)/static_sequence_gmr.o $(STATIC_SEQUENCE_DIR)/static_sequence_builder_wvtree.o $(STATIC_SEQUENCE_DIR)/static_sequence_builder_gmr.o $(STATIC_SEQUENCE_DIR)/static_sequence_wvtree_noptrs.o $(STATIC_SEQUENCE_DIR)/static_sequence_builder_wvtree_noptrs.o $(STATIC_SEQUENCE_DIR)/static_sequence_bs.o + +UTILS_DIR=utils +UTILS_OBJECTS=$(UTILS_DIR)/alphabet_mapper_none.o $(UTILS_DIR)/alphabet_mapper.o $(UTILS_DIR)/alphabet_mapper_cont.o + +%.o: %.cpp + @echo " [C++] Compiling $<" + @$(CPP) $(CPPFLAGS) $(INCL) -c $< -o $@ + +all: lib + +clean: + @echo " [CLN] Removing object files" + @rm -f $(CODERS_OBJECTS) $(STATIC_BITSEQUENCE_OBJECTS) $(STATIC_SEQUENCE_OBJECTS) $(UTILS_OBJECTS) $(STATIC_PERMUTATION_OBJECTS) + +lib: pre $(CODERS_OBJECTS) $(STATIC_BITSEQUENCE_OBJECTS) $(STATIC_SEQUENCE_OBJECTS) $(UTILS_OBJECTS) $(STATIC_PERMUTATION_OBJECTS) + @echo " [LIB] Packing the object files" + @ar rcs ../lib/libcds.a $(CODERS_OBJECTS) $(STATIC_BITSEQUENCE_OBJECTS) $(STATIC_SEQUENCE_OBJECTS) $(UTILS_OBJECTS) $(STATIC_PERMUTATION_OBJECTS) + +pre: + @echo " [HDR] Populating the includes folder" + @cp basics.h ../includes/ + @cp */*.h ../includes/ + diff --git a/src/basics.h b/src/basics.h new file mode 100644 index 0000000..85bfe22 --- /dev/null +++ b/src/basics.h @@ -0,0 +1,262 @@ +/* basics.h + * Copyright (C) 2005, Rodrigo Gonzalez, all rights reserved. + * + * Some preliminary stuff + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#ifndef _BASICS_H +#define _BASICS_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +////using namespace std; +#include +#include + + +/** mask for obtaining the first 5 bits */ +#define mask31 0x0000001F + +/** max function */ +//#define max(x,y) ((x)>(y)?(x):(y)) +/** min function */ +//#define min(x,y) ((x)<(y)?(x):(y)) + + +/** number of bits in a uint */ +#undef W +#define W 32 + +/** W-1 */ +#undef Wminusone +#define Wminusone 31 + +/** 2W*/ +#undef WW +#define WW 64 + +/** number of bits per uchar */ +#define bitsM 8 + +/** number of bytes per uint */ +#define BW 4 + +/** uchar = unsigned char */ +#ifndef uchar +#define uchar unsigned char +#endif + +/** ushort = unsigned short */ +#ifndef ushort +#define ushort unsigned short +#endif + +/** ulong = unsigned long */ +#ifndef ulong +#define ulong unsigned long +#endif + +/** uint = unsigned int */ +#ifndef uint +#define uint unsigned int +#endif + +/** number of different uchar values 0..255 */ +#define size_uchar 256 + +/** popcount array for uchars */ +const unsigned char __popcount_tab[] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, +}; + +/** select array for uchars */ +const unsigned char select_tab[] = { + 0, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, + 6, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, + 7, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, + 6, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, + 8, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, + 6, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, + 7, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, + 6, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, +}; + +/** prev array for uchars */ +const unsigned char prev_tab[] = { + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, +}; + + + +/** bits needed to represent a number between 0 and n */ +inline uint bits(uint n){ + uint b = 0; + while (n) { b++; n >>= 1; } + return b; +} + +/** reads bit p from e */ +#define bitget(e,p) ((((e)[(p)/W] >> ((p)%W))) & 1) + +/** sets bit p in e */ +#define bitset(e,p) ((e)[(p)/W] |= (1<<((p)%W))) + +/** cleans bit p in e */ +#define bitclean(e,p) ((e)[(p)/W] &= ~(1<<((p)%W))) + +/** uints required to represent e integers of n bits each */ +//#define uint_len(e,n) (((e)*(n))/W+(((e)*(n))%W > 0)) +inline uint uint_len(uint e, uint n) { + return ((unsigned long long)e*n/W+((unsigned long long)e*n%W>0)); +} + +/** Retrieve a given index from array A where every value uses len bits + * @param A Array + * @param len Length in bits of each field + * @param index Position to be retrieved + */ +inline uint get_field(uint *A, uint len, uint index) { + if(len==0) return 0; + register uint i=index*len/W, j=index*len-W*i, result; + if (j+len <= W) + result = (A[i] << (W-j-len)) >> (W-len); + else { + result = A[i] >> j; + result = result | (A[i+1] << (WW-j-len)) >> (W-len); + } + return result; +} + +/** Store a given value in index into array A where every value uses len bits + * @param A Array + * @param len Length in bits of each field + * @param index Position to store in + * @param x Value to be stored + */ +inline void set_field(uint *A, uint len, uint index, uint x) { + if(len==0) return; + uint i=index*len/W, j=index*len-i*W; + uint mask = ((j+len) < W ? ~0u << (j+len) : 0) + | ((W-j) < W ? ~0u >> (W-j) : 0); + A[i] = (A[i] & mask) | x << j; + if (j+len>W) { + mask = ((~0u) << (len+j-W)); + A[i+1] = (A[i+1] & mask)| x >> (W-j); + } +} + +/** Retrieve a given bitsequence from array A + * @param A Array + * @param ini Starting position + * @param fin Retrieve until end-1 + */ +inline uint get_var_field(uint *A, uint ini, uint fin) { + if(ini==fin+1) return 0; + uint i=ini/W, j=ini-W*i, result; + uint len = (fin-ini+1); + if (j+len <= W) + result = (A[i] << (W-j-len)) >> (W-len); + else { + result = A[i] >> j; + result = result | (A[i+1] << (WW-j-len)) >> (W-len); + } + return result; +} + +/** Stores a given bitsequence into array A + * @param A Array + * @param ini Starting position + * @param fin Store until end-1 + * @param x Value to be stored + */ +inline void set_var_field(uint *A, uint ini, uint fin, uint x) { + if(ini==fin+1) return; + uint i=ini/W, j=ini-i*W; + uint len = (fin-ini+1); + uint mask = ((j+len) < W ? ~0u << (j+len) : 0) + | ((W-j) < W ? ~0u >> (W-j) : 0); + A[i] = (A[i] & mask) | x << j; + if (j+len>W) { + mask = ((~0u) << (len+j-W)); + A[i+1] = (A[i+1] & mask)| x >> (W-j); + } +} + +/** Retrieve a given index from array A where every value uses 4 bits + * @param A Array + * @param index Position to be retrieved + */ +inline uint get_field4(uint *A, uint index) { + unsigned i=index/8, j=(index&0x7)<<2; + return (A[i] << (28-j)) >> (28); +} + +/** Counts the number of 1s in x */ +inline uint popcount(int x){ + return __popcount_tab[(x >> 0) & 0xff] + __popcount_tab[(x >> 8) & 0xff] + + __popcount_tab[(x >> 16) & 0xff] + __popcount_tab[(x >> 24) & 0xff]; +} +inline unsigned int +fast_popcount(int x) +{ + uint m1 = 0x55555555; + uint m2 = 0x33333333; + uint m4 = 0x0f0f0f0f; + x -= (x >> 1) & m1; + x = (x & m2) + ((x >> 2) & m2); + x = (x + (x >> 4)) & m4; + x += x >> 8; + return (x + (x >> 16)) & 0x3f; +} + + + +/** Counts the number of 1s in the first 16 bits of x */ +inline uint popcount16(int x){ + return __popcount_tab[x & 0xff] + __popcount_tab[(x >> 8) & 0xff]; +} + +/** Counts the number of 1s in the first 8 bits of x */ +inline uint popcount8(int x){ + return __popcount_tab[x & 0xff]; +} + +#endif /* _BASICS_H */ + diff --git a/src/coders/huff.cpp b/src/coders/huff.cpp new file mode 100644 index 0000000..52b95f9 --- /dev/null +++ b/src/coders/huff.cpp @@ -0,0 +1,257 @@ +/* huff.cpp + Copyright (C) 2008, Gonzalo Navarro, all rights reserved. + + Canonical Huffman + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ +// implements canonical Huffman + +#include + +typedef struct + { uint freq; + uint symb; + union + { int prev; + uint depth; + } h; + int ch1,ch2; + } Ttree; + +static void sort (Ttree *tree, int lo, int up) + + { uint i, j; + Ttree temp; + while (up>lo) + { i = lo; + j = up; + temp = tree[lo]; + while (i temp.freq) j--; + tree[i] = tree[j]; + while (i0) + { tree[j].freq = freq[i]; + tree[j].symb = i; + j++; + } + } + H.lim = lim = j-1; + // now run Huffman algorithm + sort (tree,0,lim); + for (i=0;i<=(int)lim;i++) + { tree[i].h.prev = i+1; + tree[i].ch1 = tree[i].ch2 = -1; + } + tree[lim].h.prev = -1; + // last = next node to process, ptr = search point, fre = next free cell + // leaves are in 0..lim in decreasing freq order + // internal nodes are in lim+1.. 2*lim, created in incr. fre order + last=0; ptr = 0; fre = lim+1; + for (i=0;i<(int)lim;i++) + { tree[fre].ch1 = last; + last = tree[last].h.prev; + tree[fre].ch2 = last; + tree[fre].freq = tree[tree[fre].ch1].freq+tree[tree[fre].ch2].freq; + while ((tree[ptr].h.prev != -1) && + (tree[tree[ptr].h.prev].freq <= tree[fre].freq)) + ptr = tree[ptr].h.prev; + tree[fre].h.prev = tree[ptr].h.prev; + tree[ptr].h.prev = fre; + last = tree[last].h.prev; + fre++; + } + // now assign depths recursively + setdepths (tree,2*lim,0); + H.s.spos = (uint*)malloc ((H.max+1)*sizeof(uint)); + for (i=0;i<=(int)H.max;i++) H.s.spos[i] = ~0; + H.num = (uint*)malloc ((lim+1)*sizeof(uint)); // max possible depth + d=0; + for (i=lim;i>=0;i--) + { H.s.spos[tree[i].symb] = i; + while ((int)tree[i].h.depth > d) + { H.num[d] = i+1; d++; } + } + H.num[d] = 0; + H.depth = d; + for (d=H.depth;d>0;d--) H.num[d] = H.num[d-1] - H.num[d]; + H.num[0] = (lim == 0); + H.num = (uint*)realloc(H.num,(H.depth+1)*sizeof(uint)); + H.total = 0; + for (i=0;i<=(int)lim;i++) + H.total += freq[tree[i].symb] * tree[i].h.depth; + free (tree); + return H; + } + +void bitzero (register uint *e, register uint p, + register uint len) + + { e += p/W; p %= W; + if (p+len >= W) + { *e &= ~((1<= W) + { *e++ = 0; + len -= W; + } + if (len > 0) + *e &= ~(((1<= H.num[d]) + { code = (code + H.num[d]) >> 1; + pos -= H.num[d--]; + } + code += pos; + if (d > W) { bitzero(stream,ptr,d-W); ptr += d-W; d = W; } + while (d--) + { if ((code >> d) & 1) bitset(stream,ptr); + else bitclean(stream,ptr); + ptr++; + } + return ptr; + } + +ulong decodeHuff (THuff H, uint *symb, uint *stream, ulong ptr) + + { uint pos; + uint d; + pos = 0; + d = 0; + while (pos < H.fst[d]) + { pos = (pos << 1) | bitget(stream,ptr); + ptr++; d++; + } + *symb = H.s.symb[H.num[d]+pos-H.fst[d]]; + return ptr; + } + +/* { uint pos; // This "improved" code is actually slower! + int d; + uint wrd,off; + stream += ptr/W; + off = ptr & (W-1); + wrd = *stream >> off; + pos = 0; + d = 0; + while (pos < H.fst[d]) + { pos = (pos << 1) | (wrd & 1); + d++; wrd >>= 1; off++; + if (off == W) { wrd = *++stream; off = 0; } + } + *symb = H.s.symb[H.num[d]+pos-H.fst[d]]; + return ptr+d; + } +*/ +void saveHuff (THuff H, FILE *f) + + { uint *symb = new uint[H.lim+1]; + uint i; + for(i=0;i<(H.lim+1);i++) symb[i] = 0; + for (i=0;i<=H.max;i++) + if (H.s.spos[i] != (uint)~0) symb[H.s.spos[i]] = i; + uint l=fwrite (&H.max,sizeof(uint),1,f); + l += fwrite (&H.lim,sizeof(uint),1,f); + l += fwrite (&H.depth,sizeof(uint),1,f); + l += fwrite (symb,sizeof(uint),H.lim+1,f); + l += fwrite (H.num,sizeof(uint),H.depth+1,f); + delete [] (symb); + } + +uint sizeHuff (THuff H) + + { return (4+(H.lim+1)+(H.depth+1))*sizeof(uint); + } + +void freeHuff (THuff H) + + { free (H.s.spos); free (H.num); + } + +THuff loadHuff (FILE *f, int enc) + + { THuff H; + uint *symb; + //uint *num; + uint i,d,dold,dact; + uint l = fread (&H.max,sizeof(uint),1,f); + l += fread (&H.lim,sizeof(uint),1,f); + l += fread (&H.depth,sizeof(uint),1,f); + symb = (uint*)malloc ((H.lim+1)*sizeof(uint)); + l += fread (symb,sizeof(uint),H.lim+1,f); + if (enc) + { H.s.spos = (uint*)malloc ((H.max+1)*sizeof(uint)); + for (i=0;i<=H.max;i++) H.s.spos[i] = (uint)~0; + for (i=0;i<=H.lim;i++) H.s.spos[symb[i]] = i; + free (symb); + } + else H.s.symb = symb; + H.num = (uint*)malloc ((H.depth+1)*sizeof(uint)); + l += fread (H.num,sizeof(uint),H.depth+1,f); + if (!enc) + { H.fst = (uint*)malloc ((H.depth+1)*sizeof(uint)); + H.fst[H.depth] = 0; dold = 0; + for (d=H.depth-1;d>=0;d--) + { dact = H.num[d+1]; + H.fst[d] = (H.fst[d+1]+dact) >> 1; + H.num[d+1] = dold; + dold += dact; + } + H.num[0] = dold; + } + return H; + } diff --git a/src/coders/huff.h b/src/coders/huff.h new file mode 100644 index 0000000..e990974 --- /dev/null +++ b/src/coders/huff.h @@ -0,0 +1,86 @@ +/* huff.h + Copyright (C) 2008, Gonzalo Navarro, all rights reserved. + + Canonical Huffman + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ + +#ifndef HUFFINCLUDED +#define HUFFINCLUDED + +#include + +typedef struct + { uint max,lim; // maximum symbol (0..max), same excluding zero freqs + uint depth; // max symbol length + union + { uint *spos; // symbol positions after sorting by decr freq (enc) + uint *symb; // symbols sorted by freq (dec) + } s; + uint *num; // first pos of each length (dec), number of each length (enc) + uint *fst; // first code (numeric) of each length (dec) + ulong total; // total length to achieve, in bits + } THuff; + + +/** Creates Huffman encoder given symbols 0..lim with frequencies + * freq[i], ready for compression + * + * @author Gonzalo Navarro + */ +THuff createHuff (uint *freq, uint lim); + +/** Encodes symb using H, over stream[ptr...lim] (ptr and lim are + * bit positions of stream). Returns the new ptr. + * + * @author Gonzalo Navarro + */ +ulong encodeHuff (THuff H, uint symb, uint *stream, ulong ptr); + +/** Decodes *symb using H, over stream[ptr...lim] (ptr and lim are + * bit positions of stream). Returns the new ptr. + * + * @author Gonzalo Navarro + */ +ulong decodeHuff (THuff H, uint *symb, uint *stream, ulong ptr); + +/** Writes H in file f + * + * @author Gonzalo Navarro + */ +void saveHuff (THuff H, FILE *f); + +/** Size of H written on file + * + * @author Gonzalo Navarro + */ +uint sizeHuff (THuff H); + +/** Frees H + * + * @author Gonzalo Navarro + */ +void freeHuff (THuff H); + +/** Loads H from file f, prepared for encoding or decoding depending + * on enc + * + * @author Gonzalo Navarro + */ +THuff loadHuff (FILE *f, int enc); + +#endif diff --git a/src/coders/huffman_codes.cpp b/src/coders/huffman_codes.cpp new file mode 100644 index 0000000..d11458d --- /dev/null +++ b/src/coders/huffman_codes.cpp @@ -0,0 +1,83 @@ +/* huffman_codes.cpp + Copyright (C) 2008, Francisco Claude, all rights reserved. + + Wrapper for huff written by Gonzalo Navarro + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ + +#include +using std::max; + +huffman_codes::huffman_codes(uint * symb, uint n) { + uint max_v = 0; + for(uint i=0;ihuff_table = loadHuff(fp,1); + return ret; +} diff --git a/src/coders/huffman_codes.h b/src/coders/huffman_codes.h new file mode 100644 index 0000000..7538225 --- /dev/null +++ b/src/coders/huffman_codes.h @@ -0,0 +1,63 @@ +/* huffman_codes.h + Copyright (C) 2008, Francisco Claude, all rights reserved. + + Wrapper for huff written by Gonzalo Navarro + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ + +#ifndef HUFFMAN_CODES_H +#define HUFFMAN_CODES_H + +#include +#include + +/** Wrapper for the canonical huffman implementation of Gonzalo Navarro. + * + * @author Francisco Claude + */ +class huffman_codes { + + public: + /** Creates the codes for the sequence seq of length n */ + huffman_codes(uint * seq, uint n); + huffman_codes(uchar * seq, uint n); + ~huffman_codes(); + + /** Encodes symb into stream at bit-position pos, return the ending position (bits) */ + ulong encode(uint symb, uint * stream, ulong pos); + + /** decodes into symb from stream at bit-position pos, returns the new position */ + ulong decode(uint * symb, uint * stream, ulong pos); + + /** Returns the maximum length of a code */ + uint max_length(); + + /** Returns the size of the table */ + uint size(); + + /** Saves the coder to a file */ + uint save(FILE *fp); + + /** Loads a coder from a file */ + static huffman_codes * load(FILE *fp); + + protected: + huffman_codes(); + THuff huff_table; +}; + +#endif diff --git a/src/static_bitsequence/sdarray.cpp b/src/static_bitsequence/sdarray.cpp new file mode 100644 index 0000000..4fe52b2 --- /dev/null +++ b/src/static_bitsequence/sdarray.cpp @@ -0,0 +1,831 @@ + +#include +using std::min; +using std::max; +#if 0 +typedef unsigned int qword; +#define logD 4 +#else +typedef unsigned long long qword; +#define logD 5 +#endif +#define PBS (sizeof(uint)*8) +#define D (1<0) { + x>>=1; + l++; + } + return l; +} + + +int __setbit(uint *B, int i,int x) { + int j,l; + //printf("%u\n",D); + j = i / D; + l = i % D; + if (x==0) B[j] &= (~(1<<(D-1-l))); + else if (x==1) B[j] |= (1<<(D-1-l)); + else { + printf("error __setbit x=%d\n",x); + exit(1); + } + return x; +} + + +int __setbit2(uchar *B, int i,int x) { + int j,l; + + j = i / 8; + l = i % 8; + if (x==0) B[j] &= (~(1<<(8-1-l))); + else if (x==1) B[j] |= (1<<(8-1-l)); + else { + printf("error __setbit2 x=%d\n",x); + exit(1); + } + return x; +} + + +int __setbits(uint *B, int i, int d, int x) { + int j; + + for (j=0; j>(d-j-1))&1); + } + return x; +} + + +int __getbit(uint *B, int i) { + int j,l; + + //j = i / D; + //l = i % D; + j = i >> logD; + l = i & (D-1); + return (B[j] >> (D-1-l)) & 1; +} + + +int __getbit2(uchar *B, int i) { + int j,l; + + //j = i / D; + //l = i % D; + j = i >> 3; + l = i & (8-1); + return (B[j] >> (8-1-l)) & 1; +} + + +#if 1 +uint __getbits(uint *B, int i, int d) { + qword x,z; + + B += (i >> logD); + i &= (D-1); + if (i+d <= 2*D) { + x = (((qword)B[0]) << D) + B[1]; + x <<= i; + x >>= (D*2-1-d); + x >>= 1; + } + else { + x = (((qword)B[0])<>= D; + x += z; + x >>= (2*D-d); + } + + return x; +} +#endif + +#if 0 +uint __getbits(uint *B, int i, int d) { + uint j,x; + + x = 0; + for (j=0; j> 1) & m1; + x = (x & m2) + ((x >> 2) & m2); + x = (x + (x >> 4)) & m4; + x += x >> 8; + return (x + (x >> 16)) & 0x3f; +} + +static inline unsigned int +_fast_popcount3(int x) +{ + uint m1 = 0x55555555; + uint m2 = 0xc30c30c3; + x -= (x >> 1) & m1; + x = (x & m2) + ((x >> 2) & m2) + ((x >> 4) & m2); + x += x >> 6; + return (x + (x >> 12) + (x >> 24)) & 0x3f; +} + +static inline unsigned int +_fast_popcount(int x) { + return _popCount[x]; +} + +static unsigned int __selecttbl[8*256]; +static int built = 0; + +void make___selecttbl(void) { + if(built) return; + built = 1; + int i,x,r; + uint buf[1]; + + for (x = 0; x < 256; x++) { + __setbits(buf,0,8,x); + for (r=0; r<8; r++) __selecttbl[(r<<8)+x] = -1; + r = 0; + for (i=0; i<8; i++) { + if (__getbit(buf,i)) { + __selecttbl[(r<<8)+x] = i; + r++; + } + } + } +} + + +unsigned int __popCount(uint x) { + uint r; + #if 0 + r = x; + r = r - ((r>>1) & 0x77777777) - ((r>>2) & 0x33333333) - ((r>>3) & 0x11111111); + r = ((r + (r>>4)) & 0x0f0f0f0f) % 0xff; + #elif 1 + r = x; + r = ((r & 0xaaaaaaaa)>>1) + (r & 0x55555555); + r = ((r & 0xcccccccc)>>2) + (r & 0x33333333); + //r = ((r & 0xf0f0f0f0)>>4) + (r & 0x0f0f0f0f); + r = ((r>>4) + r) & 0x0f0f0f0f; + //r = ((r & 0xff00ff00)>>8) + (r & 0x00ff00ff); + r = (r>>8) + r; + //r = ((r & 0xffff0000)>>16) + (r & 0x0000ffff); + r = ((r>>16) + r) & 63; + #else + r = _popCount[x & 0xff]; + x >>= 8; + r += _popCount[x & 0xff]; + x >>= 8; + r += _popCount[x & 0xff]; + x >>= 8; + r += _popCount[x & 0xff]; + #endif + return r; +} + + +unsigned int __popCount8(uint x) { + uint r; + #if 1 + r = x; + r = ((r & 0xaa)>>1) + (r & 0x55); + r = ((r & 0xcc)>>2) + (r & 0x33); + r = ((r>>4) + r) & 0x0f; + #else + r = _popCount[x & 0xff]; + #endif + return r; +} + + +int selectd2_save(selectd2 * s, FILE * fp) { + uint wr = 0; + wr += fwrite(&s->n,sizeof(uint),1,fp); + wr += fwrite(&s->m,sizeof(uint),1,fp); + wr += fwrite(&s->size,sizeof(uint),1,fp); + wr += fwrite(&s->ss_len,sizeof(uint),1,fp); + wr += fwrite(&s->sl_len,sizeof(uint),1,fp); + wr += fwrite(s->buf,sizeof(uchar),(s->n+7)/8+1,fp); + uint nl = (s->m-1) / L + 1; + wr += fwrite(s->lp,sizeof(uint),nl+1,fp); + wr += fwrite(s->p,sizeof(uint),nl+1,fp); + wr += fwrite(s->ss,sizeof(ushort),s->ss_len,fp); + wr += fwrite(s->sl,sizeof(uint),s->sl_len,fp); + if(wr!=s->sl_len+s->ss_len+2*(nl+1)+(s->n+7)/8+1+5) + return 1; + return 0; +} + + +int selectd2_load(selectd2 * s, FILE * fp) { + uint rd = 0; + rd += fread(&s->n,sizeof(uint),1,fp); + rd += fread(&s->m,sizeof(uint),1,fp); + rd += fread(&s->size,sizeof(uint),1,fp); + rd += fread(&s->ss_len,sizeof(uint),1,fp); + rd += fread(&s->sl_len,sizeof(uint),1,fp); + s->buf = new uchar[(s->n+7)/8+1]; + rd += fread(s->buf,sizeof(uchar),(s->n+7)/8+1,fp); + uint nl = (s->m-1) / L + 1; + s->lp = new uint[nl+1]; + rd += fread(s->lp,sizeof(uint),nl+1,fp); + s->p = new uint[nl+1]; + rd += fread(s->p,sizeof(uint),nl+1,fp); + s->ss = new ushort[s->ss_len]; + rd += fread(s->ss,sizeof(ushort),s->ss_len,fp); + s->sl = new uint[s->sl_len]; + rd += fread(s->sl,sizeof(uint),s->sl_len,fp); + if(rd!=s->sl_len+s->ss_len+2*(nl+1)+(s->n+7)/8+1+5) + return 1; + return 0; +} + + +void selectd2_free(selectd2 * s) { + //delete [] s->buf; + delete [] s->lp; + delete [] s->p; + delete [] s->ss; + delete [] s->sl; +} + + +int selectd2_construct(selectd2 *select, int n, uchar *buf) { + int i,m; + int nl; + int p,pp; + int il,is,ml,ms; + int r; + uint *s; + + make___selecttbl(); + + if (L/LLL == 0) { + printf("ERROR: L=%d LLL=%d\n",L,LLL); + exit(1); + } + + m = 0; + for (i=0; in = n; + select->m = m; + //printf("n=%d m=%d\n",n,m); + + select->buf = buf; + + s = new uint[m]; + m = 0; + for (i=0; isize = 0; //ignoring buf, shared with selects3 + select->lp = new uint[nl+1]; + for(int k=0;klp[k]=0; + select->size += (nl+1)*sizeof(uint); + select->p = new uint[nl+1]; + for(int k=0;kp[k]=0; + select->size += (nl+1)*sizeof(uint); + + for (r = 0; r < 2; r++) { + ml = ms = 0; + for (il = 0; il < nl; il++) { + pp = s[il*L]; + select->lp[il] = pp; + i = min((il+1)*L-1,m-1); + p = s[i]; + //printf("%d ",p-pp); + if (p - pp >= LL) { + if (r == 1) { + for (is = 0; is < L; is++) { + if (il*L+is >= m) break; + select->sl[ml*L+is] = s[il*L+is]; + } + } + select->p[il] = -((ml<= m) break; + select->ss[ms*(L/LLL)+is] = s[il*L+is*LLL] - pp; + } + } + select->p[il] = ms << (logL-logLLL); + ms++; + } + } + if (r == 0) { + select->sl = new uint[ml*L+1]; + for(int k=0;ksl[k]=0; + select->size += sizeof(uint)*(ml*L+1); + select->sl_len = ml*L+1; + select->ss = new ushort[ms*(L/LLL)+1]; + for(int k=0;kss[k]=0; + select->ss_len = ms*(L/LLL)+1; + select->size += sizeof(ushort)*(ms*(L/LLL)+1); + } + } + delete [] s; + return 0; +} + + +int selectd2_select(selectd2 *select, int i,int f) { + int p,r; + int il; + int rr; + uchar *q; + + if (i == 0) return -1; + + #if 0 + if (i > select->m) { + printf("ERROR: m=%d i=%d\n",select->m,i); + exit(1); + } + #endif + + i--; + + il = select->p[i>>logL]; + if (il < 0) { + il = -il-1; + //p = select->sl[(il<sl[il+(i & (L-1))]; + } + else { + p = select->lp[i>>logL]; + //p += select->ss[(il<<(logL-logLLL))+(i & (L-1))/LLL]; + p += select->ss[il+((i & (L-1))>>logLLL)]; + r = i - (i & (LLL-1)); + + q = &(select->buf[p>>3]); + + if (f == 1) { + rr = p & (8-1); + //r -= _popCount[*q >> (8-1-rr)]; + r -= _fast_popcount(*q >> (8-1-rr)); + //p = p - rr; + + while (1) { + //rr = _popCount[*q]; + rr = _fast_popcount(*q); + if (r + rr >= i) break; + r += rr; + //p += 8; + q++; + } + p = (q - select->buf) << 3; + p += __selecttbl[((i-r-1)<<8)+(*q)]; + } + else { + rr = p & (8-1); + //r -= _popCount[(*q ^ 0xff) >> (8-1-rr)]; + r -= _fast_popcount((*q ^ 0xff) >> (8-1-rr)); + //p = p - rr; + + while (1) { + //rr = _popCount[*q ^ 0xff]; + rr = _fast_popcount(*q ^ 0xff); + if (r + rr >= i) break; + r += rr; + //p += 8; + q++; + } + p = (q - select->buf) << 3; + p += __selecttbl[((i-r-1)<<8)+(*q ^ 0xff)]; + } + } + return p; +} + + +int selectd2_select2(selectd2 *select, int i,int f, int *st, int *en) { + int p,r,p2; + int il; + int rr; + uchar *q; + + if (i == 0) { + *st = -1; + return -1; + } + + #if 0 + if (i > select->m) { + printf("ERROR: m=%d i=%d\n",select->m,i); + exit(1); + } + #endif + + i--; + + il = select->p[i>>logL]; + if (il < 0) { + il = -il-1; + //p = select->sl[(il<sl[il+(i & (L-1))]; + + if ((i>>logL) == ((i+1)>>logL)) { + p2 = select->sl[il+((i+1) & (L-1))]; + } + else { + p2 = selectd2_select(select,i+2,f); + } + } + else { + p = select->lp[i>>logL]; + //p += select->ss[(il<<(logL-logLLL))+(i & (L-1))/LLL]; + p += select->ss[il+((i & (L-1))>>logLLL)]; + r = i - (i & (LLL-1)); + + q = &(select->buf[p>>3]); + + if (f == 1) { + rr = p & (8-1); + //r -= _popCount[*q >> (8-1-rr)]; + r -= _fast_popcount(*q >> (8-1-rr)); + //p = p - rr; + + while (1) { + //rr = _popCount[*q]; + rr = _fast_popcount(*q); + if (r + rr >= i) break; + r += rr; + //p += 8; + q++; + } + p = (q - select->buf) << 3; + p += __selecttbl[((i-r-1)<<8)+(*q)]; + + if ((i>>logL) == ((i+1)>>logL)) { + i++; + while (1) { + //rr = _popCount[*q]; + r = _fast_popcount(*q); + if (r + rr >= i) break; + r += rr; + q++; + } + p2 = (q - select->buf) << 3; + p2 += __selecttbl[((i-r-1)<<8)+(*q)]; + } + else { + p2 = selectd2_select(select,i+2,f); + } + + } + else { + rr = p & (8-1); + //r -= _popCount[(*q ^ 0xff) >> (8-1-rr)]; + r -= _fast_popcount((*q ^ 0xff) >> (8-1-rr)); + //p = p - rr; + + while (1) { + //rr = _popCount[*q ^ 0xff]; + rr = _fast_popcount(*q ^ 0xff); + if (r + rr >= i) break; + r += rr; + //p += 8; + q++; + } + p = (q - select->buf) << 3; + p += __selecttbl[((i-r-1)<<8)+(*q ^ 0xff)]; + + if ((i>>logL) == ((i+1)>>logL)) { + i++; + while (1) { + //rr = _popCount[*q ^ 0xff]; + rr = _fast_popcount(*q ^ 0xff); + if (r + rr >= i) break; + r += rr; + q++; + } + p2 = (q - select->buf) << 3; + p2 += __selecttbl[((i-r-1)<<8)+(*q ^ 0xff)]; + } + else { + p2 = selectd2_select(select,i+2,f); + } + } + } + *st = p; + *en = p2; + return p; +} + + +int selects3_save(selects3 * s, FILE * fp) { + uint wr = 0; + wr += fwrite(&s->n,sizeof(uint),1,fp); + wr += fwrite(&s->m,sizeof(uint),1,fp); + wr += fwrite(&s->size,sizeof(uint),1,fp); + wr += fwrite(&s->d,sizeof(uint),1,fp); + wr += fwrite(&s->hi_len,sizeof(uint),1,fp); + wr += fwrite(&s->low_len,sizeof(uint),1,fp); + wr += fwrite(s->hi,sizeof(uchar),s->hi_len,fp); + wr += fwrite(s->low,sizeof(uint),s->low_len,fp); + if(wr!=(6+s->hi_len+s->low_len)) + return 1; + if(selectd2_save(s->sd0,fp)) return 2; + if(selectd2_save(s->sd1,fp)) return 3; + return 0; +} + + +int selects3_load(selects3 * s, FILE * fp) { + uint rd = 0; + rd += fread(&s->n,sizeof(uint),1,fp); + rd += fread(&s->m,sizeof(uint),1,fp); + rd += fread(&s->size,sizeof(uint),1,fp); + rd += fread(&s->d,sizeof(uint),1,fp); + rd += fread(&s->hi_len,sizeof(uint),1,fp); + rd += fread(&s->low_len,sizeof(uint),1,fp); + s->hi = new uchar[s->hi_len]; + rd += fread(s->hi,sizeof(uchar),s->hi_len,fp); + s->low = new uint[s->low_len]; + rd += fread(s->low,sizeof(uint),s->low_len,fp); + if(rd!=(6+s->hi_len+s->low_len)) + return 1; + s->sd0 = new selectd2; + if(selectd2_load(s->sd0,fp)) return 2; + s->sd1 = new selectd2; + if(selectd2_load(s->sd1,fp)) return 3; + delete [] s->sd0->buf; + delete [] s->sd1->buf; + s->sd0->buf = s->hi; + s->sd1->buf = s->hi; + return 0; +} + + +void selects3_free(selects3 * s) { + delete [] s->hi; + delete [] s->low; + //delete [] s->sd0->buf; + selectd2_free(s->sd0); + delete s->sd0; + selectd2_free(s->sd1); + delete s->sd1; +} + + +int selects3_construct(selects3 *select, int n, uint *buf) { + int i,m; + int d,mm; + uint *low; + uchar *buf2; + selectd2 *sd0,*sd1; + + m = 0; + for (i=0; in = n; + select->m = m; + + if (m == 0) return 0; + + mm = m; + d = 0; + while (mm < n) { + mm <<= 1; + d++; + } + + select->d = d; + + buf2 = new uchar[(2*m+8-1)/8+1]; + for(int k=0;k<(2*m+8-1)/8+1;k++) buf2[k]=0; + select->hi_len = (2*m+8-1)/8+1; + low = new uint[(d*m+PBS-1)/PBS+1]; + for(uint k=0;k<(d*m+PBS-1)/PBS+1;k++) low[k]=0; + select->low_len = (d*m+PBS-1)/PBS+1; + + select->hi = buf2; + select->low = low; + select->size = sizeof(uchar)*((2*m+8-1)/8+1) + sizeof(uint)*((d*m+PBS-1)/PBS+1); + + for (i=0; i>d)+m,1); + __setbits(low,m*d,d,i & ((1<size += 2*sizeof(selectd2); + + selectd2_construct(sd1,m*2,buf2); + select->sd1 = sd1; + + for (i=0; isd0 = sd0; + + for (i=0; i select->m) { + printf("ERROR: m=%d i=%d\n",select->m,i); + exit(1); + } + #endif + + if (i == 0) return -1; + + d = select->d; + /*if(select->lasti==(uint)i-1) { + while(!__getbit2(select->sd1->buf,++select->lasts)); + } + else { + select->lasts = selectd2_select(select->sd1,i,1); + } + select->lasti = i; + //lasts3 = select; */ + x = selectd2_select(select->sd1,i,1) - (i-1); + //x = (select->lasts-(i-1)) << d; + x <<= d; + x += __getbits(select->low,(i-1)*d,d); + return x; +} + + +int selects3_selectnext(selects3 *select, int i) { + //return selects3_select(select,selects3_rank(select,i)+1); + int d,x,w,y; + int r,j; + int z,ii; + uint *q; + d = select->d; + q = select->low; + ii = i>>d; + y = selectd2_select(select->sd0,ii,0)+1; + int k2=y-ii; + x = y - ii; + int x_orig = x; + j = i - (ii<>= 3; + z = select->hi[y]; + while (1) { + if (((z << r) & 0x80) == 0) { + if(x!=x_orig) k2++; + break; + } + w = __getbits(q,x*d,d); + if (w >= j) { + if (w == j) { + if(__getbit2(select->hi,(8*y+r))) k2++; + x++; + r++; + } + break; + } + x++; + r++; + if(__getbit2(select->hi,(8*y+r))) k2++; + if (r == 8) { + r = 0; + y++; + z = select->hi[y]; + } + } + if(x==select->m) + return (uint)-1; + int c=8*y+r; + int fin=0; + for(int kk=0;kk<8-r;kk++) { + if(__getbit2(select->hi,c)) { + fin=1; + break; + } + c++; + } + if(!fin) { + int pp = c/8; + while(select->hi[pp]==0) { + pp++; + c+=8; + } + while(!__getbit2(select->hi,c)) c++; + } + c -= (k2); + return __getbits(q,x*d,d)+((c)<d; + q = select->low; + + ii = i>>d; + + y = selectd2_select(select->sd0,ii,0)+1; + // selectd2_select2(select->sd0,ii,0,&y1,&y2); + //y1++; y2++; + //printf("y %d y1 %d %d\n",y,y1,y2-y1); + + x = y - ii; + + j = i - (ii<>= 3; + z = select->hi[y]; + while (1) { + if (((z << r) & 0x80) == 0) break; + w = __getbits(q,x*d,d); + if (w >= j) { + if (w == j) x++; + break; + } + x++; + r++; + if (r == 8) { + r = 0; + y++; + z = select->hi[y]; + } + } + + return x; +} + diff --git a/src/static_bitsequence/sdarray.h b/src/static_bitsequence/sdarray.h new file mode 100644 index 0000000..fe01200 --- /dev/null +++ b/src/static_bitsequence/sdarray.h @@ -0,0 +1,50 @@ + +#ifndef SDARRAY_H +#define SDARRAY_H + +#include +#include +#include +#include +#include + +typedef struct { + int n,m; + int size; + uchar *buf; + uint *lp; + uint *sl; + ushort *ss; + uint ss_len, sl_len; + uint *p; +} selectd2; + +typedef struct { + int n,m,d; + int size; + uchar *hi; + uint *low; + selectd2 *sd0,*sd1; + uint hi_len, low_len; + //uint lasti, lasts; +} selects3; + +int selects3_construct(selects3 *select, int n, uint *buf); +int selects3_select(selects3 *select, int i); +int selects3_rank(selects3 *select, int i); +int selects3_selectnext(selects3 *select, int i); + +void make___selecttbl(void); +int __setbit(uint *B, int i,int x); +int selectd2_save(selectd2 * s, FILE * fp); +int selects3_save(selects3 * s, FILE * fp); + +int selectd2_load(selectd2 * s, FILE * fp); +int selects3_load(selects3 * s, FILE * fp); + +void selectd2_free(selectd2 * s); +void selects3_free(selects3 * s); + + +#endif + diff --git a/src/static_bitsequence/static_bitsequence.cpp b/src/static_bitsequence/static_bitsequence.cpp new file mode 100644 index 0000000..ad07b84 --- /dev/null +++ b/src/static_bitsequence/static_bitsequence.cpp @@ -0,0 +1,116 @@ +/* static_bitsequence.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_bitsequence definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "static_bitsequence.h" + +uint static_bitsequence::rank0(uint i) { + return i+1-rank1(i); +} + +uint static_bitsequence::rank1(uint i) { + if(i>=len) return (uint)-1; + if(ones==0) return 0; + if(ones==len) return i+1; + uint ini = 1; + uint fin = ones; + while(inii) return ini-1; + return ini; +} + +uint static_bitsequence::select0(uint i) { + if(i>len-ones) return -1; + if(i==0) return -1; + if(ones==0) return i-1; + uint ini = 0; + uint fin = len-1; + while(iniones) return -1; + if(i==0) return -1; + if(ones==len) return i-1; + uint ini = 0; + uint fin = len-1; + while(ini0; +} + +uint static_bitsequence::length() { + return len; +} + +uint static_bitsequence::count_one() { + return ones; +} + +uint static_bitsequence::count_zero() { + return len-ones; +} + +static_bitsequence * static_bitsequence::load(FILE * fp) { + uint r; + if(fread(&r,sizeof(uint),1,fp)!=1) return NULL; + fseek(fp,-1*sizeof(uint),SEEK_CUR); + switch(r) { + case RRR02_HDR: return static_bitsequence_rrr02::load(fp); + case BRW32_HDR: return static_bitsequence_brw32::load(fp); + case RRR02_LIGHT_HDR: return static_bitsequence_rrr02_light::load(fp); + case SDARRAY_HDR: return static_bitsequence_sdarray::load(fp); + } + return NULL; +} diff --git a/src/static_bitsequence/static_bitsequence.h b/src/static_bitsequence/static_bitsequence.h new file mode 100644 index 0000000..d8e3f18 --- /dev/null +++ b/src/static_bitsequence/static_bitsequence.h @@ -0,0 +1,99 @@ +/* static_bitsequence.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_bitsequence definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _STATIC_BITSEQUENCE_H +#define _STATIC_BITSEQUENCE_H + +#define RRR02_HDR 2 +#define BRW32_HDR 3 +#define RRR02_LIGHT_HDR 4 +#define SDARRAY_HDR 5 + +#include +#include + + +//using namespace std; + +/** Base class for static bitsequences, contains many abstract functions, so this can't + * be instantiated. It includes base implementations for rank0, select0 and select1 based + * on rank0. + * + * @author Francisco Claude + */ +class static_bitsequence { + +public: + virtual ~static_bitsequence() {}; + + /** Returns the number of zeros until position i */ + virtual uint rank0(uint i); + + /** Returns the position of the i-th zero + * @return (uint)-1 if i=0, len if i>num_zeros or the position */ + virtual uint select0(uint i); + + /** Returns the number of ones until position i */ + virtual uint rank1(uint i); + + /** Returns the position of the i-th one + * @return (uint)-1 if i=0, len if i>num_ones or the position */ + virtual uint select1(uint i); + + virtual uint select_next1(uint i); + virtual uint select_next0(uint i); + + /** Returns the i-th bit */ + virtual bool access(uint i); + + /** Returns the length in bits of the bitmap */ + virtual uint length(); + + /** Returns how many ones are in the bitstring */ + virtual uint count_one(); + + /** Returns how many zeros are in the bitstring */ + virtual uint count_zero(); + + /** Returns the size of the structure in bytes */ + virtual uint size()=0; + + /** Stores the bitmap given a file pointer, return 0 in case of success */ + virtual int save(FILE * fp)=0; + + /** Reads a bitmap determining the type */ + static static_bitsequence * load(FILE * fp); + +protected: + /** Length of the bitstring */ + uint len; + /** Number of ones in the bitstring */ + uint ones; + +}; + +#include +#include +#include +#include +#include + +#endif /* _STATIC_BITSEQUENCE_H */ diff --git a/src/static_bitsequence/static_bitsequence_brw32.cpp b/src/static_bitsequence/static_bitsequence_brw32.cpp new file mode 100644 index 0000000..204ce57 --- /dev/null +++ b/src/static_bitsequence/static_bitsequence_brw32.cpp @@ -0,0 +1,361 @@ +/* static_bitsequence_brw32.cpp + Copyright (C) 2005, Rodrigo Gonzalez, all rights reserved. + + New RANK, SELECT, SELECT-NEXT and SPARSE RANK implementations. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ + +#include "static_bitsequence_brw32.h" +#include +#include +// #include +using std::cout; +using std::endl; + +///////////// +//Rank(B,i)// +///////////// +//_factor = 0 => s=W*lgn +//_factor = P => s=W*P +//Is interesting to notice +//factor=2 => overhead 50% +//factor=3 => overhead 33% +//factor=4 => overhead 25% +//factor=20=> overhead 5% + +static_bitsequence_brw32::static_bitsequence_brw32(){ + data=NULL; +// this->owner = true; + this->n=0; + this->factor=0; +} + +static_bitsequence_brw32::static_bitsequence_brw32( uint *bitarray, uint _n, uint _factor){ + /*cout << "*****" << endl; + cout << bitarray << endl; + cout << _n << endl; + cout << _factor << endl; */ + if(_factor==0) exit(-1); + data=new uint[_n/W+1]; + for(uint i=0;iowner = true; + this->n=_n; + uint lgn=bits(n-1); + this->factor=_factor; + if (_factor==0) this->factor=lgn; + else this->factor=_factor; + b=32; + s=b*this->factor; + integers = n/W+1; + BuildRank(); + this->len = n; + this->ones = rank1(n-1); +} + +static_bitsequence_brw32::~static_bitsequence_brw32() { + delete [] Rs; + delete [] data; +} + +//Metodo que realiza la busqueda d +void static_bitsequence_brw32::BuildRank(){ + uint num_sblock = n/s; + Rs = new uint[num_sblock+5];// +1 pues sumo la pos cero + for(uint i=0;in,sizeof(uint),1,f) != 1) return NULL; + ret->b=32; // b is a word + if (fread (&ret->factor,sizeof(uint),1,f) != 1) return NULL; + ret->s=ret->b*ret->factor; + uint aux=(ret->n+1)%W; + if (aux != 0) + ret->integers = (ret->n+1)/W+1; + else + ret->integers = (ret->n+1)/W; + ret->data = new uint[ret->n/W+1]; + if (!ret->data) return NULL; + if (fread (ret->data,sizeof(uint),ret->n/W+1,f) != ret->n/W+1) return NULL; + ret->Rs= new uint[ret->n/ret->s+1]; + if (!ret->Rs) return NULL; + if (fread (ret->Rs,sizeof(uint),ret->n/ret->s+1,f) != ret->n/ret->s+1) return NULL; + ret->len = ret->n; + ret->ones = ret->rank1(ret->n-1); + return ret; +} + +uint static_bitsequence_brw32::SpaceRequirementInBits() { + return uint_len(n,1)*sizeof(uint)*8+(n/s)*sizeof(uint)*8 +sizeof(static_bitsequence_brw32)*8; +} + +uint static_bitsequence_brw32::size() { + return sizeof(static_bitsequence_brw32)+SpaceRequirementInBits()/8; +} + +uint static_bitsequence_brw32::SpaceRequirement() { + return n/8+(n/s)*sizeof(uint)+sizeof(static_bitsequence_brw32); +} + +uint static_bitsequence_brw32::prev2(uint start) { + // returns the position of the previous 1 bit before and including start. + // tuned to 32 bit machine + + uint i = start >> 5; + int offset = (start % W); + uint answer = start; + uint val = data[i] << (Wminusone-offset); + + if (!val) { val = data[--i]; answer -= 1+offset; } + + while (!val) { val = data[--i]; answer -= W; } + + if (!(val & 0xFFFF0000)) { val <<= 16; answer -= 16; } + if (!(val & 0xFF000000)) { val <<= 8; answer -= 8; } + + while (!(val & 0x80000000)) { val <<= 1; answer--; } + return answer; +} + +uint static_bitsequence_brw32::prev(uint start) { + // returns the position of the previous 1 bit before and including start. + // tuned to 32 bit machine + + uint i = start >> 5; + int offset = (start % W); + uint aux2 = data[i] & (-1u >> (31-offset)); + + if (aux2 > 0) { + if ((aux2&0xFF000000) > 0) return i*W+23+prev_tab[(aux2>>24)&0xFF]; + else if ((aux2&0xFF0000) > 0) return i*W+15+prev_tab[(aux2>>16)&0xFF]; + else if ((aux2&0xFF00) > 0) return i*W+7+prev_tab[(aux2>>8)&0xFF]; + else return i*W+prev_tab[aux2&0xFF]-1; + } + for (uint k=i-1;;k--) { + aux2=data[k]; + if (aux2 > 0) { + if ((aux2&0xFF000000) > 0) return k*W+23+prev_tab[(aux2>>24)&0xFF]; + else if ((aux2&0xFF0000) > 0) return k*W+15+prev_tab[(aux2>>16)&0xFF]; + else if ((aux2&0xFF00) > 0) return k*W+7+prev_tab[(aux2>>8)&0xFF]; + else return k*W+prev_tab[aux2&0xFF]-1; + } + } + return 0; +} + +uint static_bitsequence_brw32::next(uint k) { + uint count = k; + uint des,aux2; + des=count%W; + aux2= data[count/W] >> des; + if (aux2 > 0) { + if ((aux2&0xff) > 0) return count+select_tab[aux2&0xff]-1; + else if ((aux2&0xff00) > 0) return count+8+select_tab[(aux2>>8)&0xff]-1; + else if ((aux2&0xff0000) > 0) return count+16+select_tab[(aux2>>16)&0xff]-1; + else {return count+24+select_tab[(aux2>>24)&0xff]-1;} + } + + for (uint i=count/W+1;i 0) { + if ((aux2&0xff) > 0) return i*W+select_tab[aux2&0xff]-1; + else if ((aux2&0xff00) > 0) return i*W+8+select_tab[(aux2>>8)&0xff]-1; + else if ((aux2&0xff0000) > 0) return i*W+16+select_tab[(aux2>>16)&0xff]-1; + else {return i*W+24+select_tab[(aux2>>24)&0xff]-1;} + } + } + return n; +} + +uint static_bitsequence_brw32::select1(uint x) { + // returns i such that x=rank(i) && rank(i-1)ones) return (uint)(-1); + + //binary search over first level rank structure + uint l=0, r=n/s; + uint mid=(l+r)/2; + uint rankmid = Rs[mid]; + while (l<=r) { + if (rankmid integers) return n; + j = data[left]; + ones = popcount(j); + } + //sequential search using popcount over a char + left=left*b; + rankmid = popcount8(j); + if (rankmid < x) { + j=j>>8; + x-=rankmid; + left+=8; + rankmid = popcount8(j); + if (rankmid < x) { + j=j>>8; + x-=rankmid; + left+=8; + rankmid = popcount8(j); + if (rankmid < x) { + j=j>>8; + x-=rankmid; + left+=8; + } + } + } + + // then sequential search bit a bit + while (x>0) { + if (j&1) x--; + j=j>>1; + left++; + } + return left-1; +} + +uint static_bitsequence_brw32::select0(uint x) { + // returns i such that x=rank_0(i) && rank_0(i-1)n-ones) return (uint)(-1); + + //binary search over first level rank structure + if(x==0) return 0; + uint l=0, r=n/s; + uint mid=(l+r)/2; + uint rankmid = mid*factor*W-Rs[mid]; + while (l<=r) { + if (rankmid integers) return n; + j = data[left]; + zeros = W-popcount(j); + } + //sequential search using popcount over a char + left=left*b; + rankmid = 8-popcount8(j); + if (rankmid < x) { + j=j>>8; + x-=rankmid; + left+=8; + rankmid = 8-popcount8(j); + if (rankmid < x) { + j=j>>8; + x-=rankmid; + left+=8; + rankmid = 8-popcount8(j); + if (rankmid < x) { + j=j>>8; + x-=rankmid; + left+=8; + } + } + } + + // then sequential search bit a bit + while (x>0) { + if (j%2 == 0 ) x--; + j=j>>1; + left++; + } + left--; + if (left > n) return n; + else return left; +} diff --git a/src/static_bitsequence/static_bitsequence_brw32.h b/src/static_bitsequence/static_bitsequence_brw32.h new file mode 100644 index 0000000..64fcf1b --- /dev/null +++ b/src/static_bitsequence/static_bitsequence_brw32.h @@ -0,0 +1,78 @@ +/* static_bitsequence_brw32.h + Copyright (C) 2005, Rodrigo Gonzalez, all rights reserved. + + New RANK, SELECT, SELECT-NEXT and SPARSE RANK implementations. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ + +#ifndef _STATIC_BITSEQUENCE_BRW32_H +#define _STATIC_BITSEQUENCE_BRW32_H + +#include +#include +///////////// +//Rank(B,i)// +///////////// +//_factor = 0 => s=W*lgn +//_factor = P => s=W*P +//Is interesting to notice +//factor=2 => overhead 50% +//factor=3 => overhead 33% +//factor=4 => overhead 25% +//factor=20=> overhead 5% + +/** Implementation of Rodrigo Gonzalez et al. practical rank/select solution [1]. + * The interface was adapted. + * + * [1] Rodrigo Gonzalez, Szymon Grabowski, Veli Makinen, and Gonzalo Navarro. + * Practical Implementation of Rank and Select Queries. WEA05. + * + * @author Rodrigo Gonzalez + */ +class static_bitsequence_brw32 : public static_bitsequence { +private: + uint *data; + //bool owner; + uint n,integers; + uint factor,b,s; + uint *Rs; //superblock array + + uint BuildRankSub(uint ini,uint fin); //uso interno para contruir el indice rank + void BuildRank(); //crea indice para rank + static_bitsequence_brw32(); + +public: + static_bitsequence_brw32(uint *bitarray, uint n, uint factor); + ~static_bitsequence_brw32(); //destructor + virtual bool access(uint i); + virtual uint rank1(uint i); //Nivel 1 bin, nivel 2 sec-pop y nivel 3 sec-bit + + uint prev(uint start); // gives the largest index i<=start such that IsBitSet(i)=true + uint prev2(uint start); // gives the largest index i<=start such that IsBitSet(i)=true + uint next(uint start); // gives the smallest index i>=start such that IsBitSet(i)=true + virtual uint select0(uint x); // gives the position of the x:th 1. + virtual uint select1(uint x); // gives the position of the x:th 1. + uint SpaceRequirementInBits(); + uint SpaceRequirement(); + virtual uint size(); + + /*load-save functions*/ + virtual int save(FILE *f); + static static_bitsequence_brw32 * load(FILE * fp); +}; + +#endif diff --git a/src/static_bitsequence/static_bitsequence_builder.h b/src/static_bitsequence/static_bitsequence_builder.h new file mode 100644 index 0000000..72135a6 --- /dev/null +++ b/src/static_bitsequence/static_bitsequence_builder.h @@ -0,0 +1,37 @@ +/* static_bitsequence_builder.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_bitsequence_builder definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _STATIC_BITSEQUENCE_BUILDER_H +#define _STATIC_BITSEQUENCE_BUILDER_H + +class static_bitsequence_builder { + public: + virtual ~static_bitsequence_builder() {} + /** Builds a static_bitsequence for the bitmap bitsequence of length len */ + virtual static_bitsequence * build(uint * bitsequence, uint len)=0; +}; + +#include +#include +#include +#include + +#endif /* _STATIC_BITSEQUENCE_BUILDER_H */ diff --git a/src/static_bitsequence/static_bitsequence_builder_brw32.cpp b/src/static_bitsequence/static_bitsequence_builder_brw32.cpp new file mode 100644 index 0000000..14e8057 --- /dev/null +++ b/src/static_bitsequence/static_bitsequence_builder_brw32.cpp @@ -0,0 +1,30 @@ +/* static_bitsequence_builder_brw32.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_bitsequence_builder_brw32 definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +static_bitsequence_builder_brw32::static_bitsequence_builder_brw32(uint sampling) { + this->sample_rate=sampling; +} + +static_bitsequence * static_bitsequence_builder_brw32::build(uint * bitsequence, uint len) { + return new static_bitsequence_brw32(bitsequence,len,this->sample_rate); +} diff --git a/src/static_bitsequence/static_bitsequence_builder_brw32.h b/src/static_bitsequence/static_bitsequence_builder_brw32.h new file mode 100644 index 0000000..a2f9308 --- /dev/null +++ b/src/static_bitsequence/static_bitsequence_builder_brw32.h @@ -0,0 +1,40 @@ +/* static_bitsequence_builder_brw32.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_bitsequence_builder_brw32 definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _STATIC_BITSEQUENCE_BUILDER_BRW32_H +#define _STATIC_BITSEQUENCE_BUILDER_BRW32_H + +#include +#include +#include + +class static_bitsequence_builder_brw32 : public static_bitsequence_builder { + public: + /** Defines the sample rate used to build the bitmaps (brw32) */ + static_bitsequence_builder_brw32(uint sampling); + virtual ~static_bitsequence_builder_brw32() {} + virtual static_bitsequence * build(uint * bitsequence, uint len); + + protected: + uint sample_rate; +}; + +#endif /* _STATIC_BITSEQUENCE_BUILDER_BRW32_H */ diff --git a/src/static_bitsequence/static_bitsequence_builder_rrr02.cpp b/src/static_bitsequence/static_bitsequence_builder_rrr02.cpp new file mode 100644 index 0000000..36ddc31 --- /dev/null +++ b/src/static_bitsequence/static_bitsequence_builder_rrr02.cpp @@ -0,0 +1,30 @@ +/* static_bitsequence_builder_rrr02.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_bitsequence_builder_rrr02 definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +static_bitsequence_builder_rrr02::static_bitsequence_builder_rrr02(uint sampling) { + sample_rate=sampling; +} + +static_bitsequence * static_bitsequence_builder_rrr02::build(uint * bitsequence, uint len) { + return new static_bitsequence_rrr02(bitsequence,len,sample_rate); +} diff --git a/src/static_bitsequence/static_bitsequence_builder_rrr02.h b/src/static_bitsequence/static_bitsequence_builder_rrr02.h new file mode 100644 index 0000000..1ebf5a1 --- /dev/null +++ b/src/static_bitsequence/static_bitsequence_builder_rrr02.h @@ -0,0 +1,40 @@ +/* static_bitsequence_builder_rrr02.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_bitsequence_builder_rrr02 definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _STATIC_BITSEQUENCE_BUILDER_RRR02_H +#define _STATIC_BITSEQUENCE_BUILDER_RRR02_H + +#include +#include +#include + +class static_bitsequence_builder_rrr02 : public static_bitsequence_builder { + public: + /** Defines the sample rate used to build the bitmaps (rrr02) */ + static_bitsequence_builder_rrr02(uint sampling); + virtual ~static_bitsequence_builder_rrr02() {} + virtual static_bitsequence * build(uint * bitsequence, uint len); + + protected: + uint sample_rate; +}; + +#endif /* _STATIC_BITSEQUENCE_BUILDER_RRR02_H */ diff --git a/src/static_bitsequence/static_bitsequence_builder_rrr02_light.cpp b/src/static_bitsequence/static_bitsequence_builder_rrr02_light.cpp new file mode 100644 index 0000000..6aee2be --- /dev/null +++ b/src/static_bitsequence/static_bitsequence_builder_rrr02_light.cpp @@ -0,0 +1,30 @@ +/* static_bitsequence_builder_rrr02_light.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_bitsequence_builder_rrr02_light definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +static_bitsequence_builder_rrr02_light::static_bitsequence_builder_rrr02_light(uint sampling) { + sample_rate=sampling; +} + +static_bitsequence * static_bitsequence_builder_rrr02_light::build(uint * bitsequence, uint len) { + return new static_bitsequence_rrr02_light(bitsequence,len,sample_rate); +} diff --git a/src/static_bitsequence/static_bitsequence_builder_rrr02_light.h b/src/static_bitsequence/static_bitsequence_builder_rrr02_light.h new file mode 100644 index 0000000..3bbf232 --- /dev/null +++ b/src/static_bitsequence/static_bitsequence_builder_rrr02_light.h @@ -0,0 +1,40 @@ +/* static_bitsequence_builder_rrr02_light.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_bitsequence_builder_rrr02_light definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _STATIC_BITSEQUENCE_BUILDER_RRR02_LIGHT_H +#define _STATIC_BITSEQUENCE_BUILDER_RRR02_LIGHT_H + +#include +#include +#include + +class static_bitsequence_builder_rrr02_light : public static_bitsequence_builder { + public: + /** Defines the sample rate used to build the bitmaps (rrr02) */ + static_bitsequence_builder_rrr02_light(uint sampling); + virtual ~static_bitsequence_builder_rrr02_light() {} + virtual static_bitsequence * build(uint * bitsequence, uint len); + + protected: + uint sample_rate; +}; + +#endif /* _STATIC_BITSEQUENCE_BUILDER_RRR02_LIGHT_H */ diff --git a/src/static_bitsequence/static_bitsequence_builder_sdarray.cpp b/src/static_bitsequence/static_bitsequence_builder_sdarray.cpp new file mode 100644 index 0000000..da9b233 --- /dev/null +++ b/src/static_bitsequence/static_bitsequence_builder_sdarray.cpp @@ -0,0 +1,7 @@ + +#include + +static_bitsequence * static_bitsequence_builder_sdarray::build(uint * buff, uint len) { + return new static_bitsequence_sdarray(buff,len); +} + diff --git a/src/static_bitsequence/static_bitsequence_builder_sdarray.h b/src/static_bitsequence/static_bitsequence_builder_sdarray.h new file mode 100644 index 0000000..f16da32 --- /dev/null +++ b/src/static_bitsequence/static_bitsequence_builder_sdarray.h @@ -0,0 +1,36 @@ +/* static_bitsequence_builder.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_bitsequence_builder definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _STATIC_BITSEQUENCE_BUILDER_SDARRAY_H +#define _STATIC_BITSEQUENCE_BUILDER_SDARRAY_H + +#include +#include + +class static_bitsequence_builder_sdarray : public static_bitsequence_builder { + public: + static_bitsequence_builder_sdarray() {} + virtual ~static_bitsequence_builder_sdarray() {} + /** Builds a static_bitsequence for the bitmap bitsequence of length len */ + virtual static_bitsequence * build(uint * bitsequence, uint len); +}; + +#endif /* _STATIC_BITSEQUENCE_BUILDER_H */ diff --git a/src/static_bitsequence/static_bitsequence_naive.cpp b/src/static_bitsequence/static_bitsequence_naive.cpp new file mode 100644 index 0000000..d9d0a0c --- /dev/null +++ b/src/static_bitsequence/static_bitsequence_naive.cpp @@ -0,0 +1,70 @@ +/* static_bitsequence_naive.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * Naive Bitsequence - don't use, only for testing + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "static_bitsequence_naive.h" + +static_bitsequence_naive::static_bitsequence_naive(uint * bitseq, uint len) { + this->len = len; + this->bitseq = new uint[len/W+(len%W>0)]; + for(uint i=0;i0);i++) + this->bitseq[i] = bitseq[i]; + uint ret = 0; + for(uint k=0;kones = ret; +} + +static_bitsequence_naive::~static_bitsequence_naive() { + delete [] bitseq; +} + +uint static_bitsequence_naive::rank1(uint i) { + if(i>=len) return ones; + uint ret = 0; + for(uint k=0;k<=i;k++) + if(bitget(bitseq,k)) + ret++; + return ret; +} + +uint static_bitsequence_naive::select1(uint i) { + if(i==0) return (uint)-1; + if(i>ones) return len; + uint cnt = 0; + for(uint k=0;k + +/** Class used for testing, should not be used with long bitmaps + * @author Francisco Claude + */ +class static_bitsequence_naive: public static_bitsequence { +public: + /** Builds a naive bitsequence, receives the bitmap and the length + * in bits + */ + static_bitsequence_naive(uint * bitseq, uint len); + + virtual ~static_bitsequence_naive(); + + /** Returns the number of ones until position i */ + virtual uint rank1(uint i); + + /** Returns the position of the i-th one + * @return (uint)-1 if i=0, len if i>num_ones or the position */ + virtual uint select1(uint i); + + /** Returns the i-th bit */ + virtual bool access(uint i); + + /** Returns the size of the structure in bytes */ + virtual uint size(); + + /** - Not implemented - */ + virtual int save(FILE * fp); + + +protected: + uint * bitseq; +}; + +#endif /* _STATIC_BITSEQUENCE_NAIVE_H */ + diff --git a/src/static_bitsequence/static_bitsequence_rrr02.cpp b/src/static_bitsequence/static_bitsequence_rrr02.cpp new file mode 100644 index 0000000..18396fa --- /dev/null +++ b/src/static_bitsequence/static_bitsequence_rrr02.cpp @@ -0,0 +1,349 @@ +/* static_bitsequence_rrr02.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_bitsequence_rrr02 definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +using std::min; +using std::max; +table_offset * static_bitsequence_rrr02::E = NULL; + +static_bitsequence_rrr02::static_bitsequence_rrr02() { + ones=0; + len=0; + if(E==NULL) E = new table_offset(BLOCK_SIZE); + E->use(); + C = NULL; + O = NULL; + C_sampling = NULL; + O_pos = NULL; + sample_rate = DEFAULT_SAMPLING; + C_len = O_len = C_sampling_len = O_pos_len = 0; + O_bits_len = C_sampling_field_bits = O_pos_field_bits = 0; +} + +static_bitsequence_rrr02::static_bitsequence_rrr02(uint * bitseq, uint len, uint sample_rate) { + ones = 0; + this->len = len; + if(E==NULL) E = new table_offset(BLOCK_SIZE); + E->use(); + // Table C + C_len = len/BLOCK_SIZE + (len%BLOCK_SIZE!=0); + C_field_bits = bits(BLOCK_SIZE); + C = new uint[uint_len(C_len,C_field_bits)]; + for(uint i=0;iget_log2binomial(BLOCK_SIZE,value); + } + // Table O + O_len = uint_len(1,O_bits_len); + O = new uint[O_len]; + for(uint i=0;iget_log2binomial(BLOCK_SIZE,popcount(value))-1,E->compute_offset((ushort)value)); + O_pos += E->get_log2binomial(BLOCK_SIZE,popcount(value)); + } + C_sampling = NULL; + this->O_pos = NULL; + + create_sampling(sample_rate); +} + +void static_bitsequence_rrr02::create_sampling(uint sample_rate) { + this->sample_rate = sample_rate; +/* for(uint i=0;iget_log2binomial(BLOCK_SIZE,get_field(C,C_field_bits,i)); + }*/ + // Sampling for C + C_sampling_len = C_len/sample_rate+2; + C_sampling_field_bits = bits(ones); + if(C_sampling!=NULL) delete [] C_sampling; + C_sampling = new uint[max((uint)1,uint_len(C_sampling_len,C_sampling_field_bits))]; + for(uint i=0;iget_log2binomial(BLOCK_SIZE,get_field(C,C_field_bits,i)); + } +} + +bool static_bitsequence_rrr02::access(uint i) { + uint nearest_sampled_value = i/BLOCK_SIZE/sample_rate; + uint pos_O = get_field(O_pos,O_pos_field_bits,nearest_sampled_value); + uint pos = i/BLOCK_SIZE; + assert(pos<=C_len); + for(uint k=nearest_sampled_value*sample_rate;kget_log2binomial(BLOCK_SIZE,aux); + } + uint c = get_field(C,C_field_bits,pos); + return ((1<<(i%BLOCK_SIZE))&E->short_bitmap(c,get_var_field(O,pos_O,pos_O+E->get_log2binomial(BLOCK_SIZE,c)-1)))!=0; +} + +uint static_bitsequence_rrr02::rank0(uint i) { + if(i+1==0) return 0; + return 1+i-rank1(i); +} + +uint static_bitsequence_rrr02::rank1(uint i) { + if(i+1==0) return 0; + uint nearest_sampled_value = i/BLOCK_SIZE/sample_rate; + uint sum = get_field(C_sampling,C_sampling_field_bits,nearest_sampled_value); + uint pos_O = get_field(O_pos,O_pos_field_bits,nearest_sampled_value); + uint pos = i/BLOCK_SIZE; + uint k=nearest_sampled_value*sample_rate; + if(k%2==1 && kget_log2binomial(BLOCK_SIZE,aux); + k++; + } + uchar * a = (uchar *)C; + uint mask = 0x0F; + a += k/2; + while(k<(uint)max(0,(int)pos-1)) { + assert(((*a)&mask)==get_field(C,C_field_bits,k)); + assert((*a)/16==get_field(C,C_field_bits,k+1)); + sum += ((*a)&mask)+(*a)/16; + pos_O += E->get_log2binomial(BLOCK_SIZE,((*a)&mask))+E->get_log2binomial(BLOCK_SIZE,((*a)/16)); + a++; + k+=2; + } + if(kget_log2binomial(BLOCK_SIZE,aux); + k++; + } + uint c = get_field(C,C_field_bits,pos); + sum += popcount(((2<<(i%BLOCK_SIZE))-1) & E->short_bitmap(c,get_var_field(O,pos_O,pos_O+E->get_log2binomial(BLOCK_SIZE,c)-1))); + return sum; +} + +uint static_bitsequence_rrr02::select0(uint i) { + if(i==0) return (uint)-1; + if(i>len-ones) return (uint)-1; + // Search over partial sums + uint start=0; + uint end=C_sampling_len-1; + uint med, acc=0, pos; + while(start=i) break; + pos_O += E->get_log2binomial(BLOCK_SIZE,s); + acc += BLOCK_SIZE-s; + } + pos = (pos)*BLOCK_SIZE; + // Search inside the block + + while(accget_log2binomial(BLOCK_SIZE,s); + uint block = E->short_bitmap(s,get_var_field(O,pos_O,new_posO-1)); + pos_O = new_posO; + new_posO = 0; + while(accones) return -1; + // Search over partial sums + uint start=0; + uint end=C_sampling_len-1; + uint med, acc=0, pos; + while(start=i) break; + pos_O += E->get_log2binomial(BLOCK_SIZE,s); + acc += s; + } + pos = (pos)*BLOCK_SIZE; + //cout << "pos=" << pos << endl; + // Search inside the block + while(accget_log2binomial(BLOCK_SIZE,s); + uint block = E->short_bitmap(s,get_var_field(O,pos_O,new_posO-1)); + pos_O = new_posO; + new_posO = 0; + while(accsize() << endl;*/ + uint sum = sizeof(static_bitsequence_rrr02); + sum += uint_len(C_len,C_field_bits)*sizeof(uint); + sum += O_len*sizeof(uint); + sum += uint_len(C_sampling_len,C_sampling_field_bits)*sizeof(uint); + sum += uint_len(O_pos_len,O_pos_field_bits)*sizeof(uint); + //sum += E->size(); + return sum; +} + +static_bitsequence_rrr02::~static_bitsequence_rrr02() { + if(C!=NULL) delete [] C; + if(O!=NULL) delete [] O; + if(C_sampling!=NULL) delete [] C_sampling; + if(O_pos!=NULL) delete [] O_pos; + E = E->unuse(); +} + +int static_bitsequence_rrr02::save(FILE * fp) { + uint wr = RRR02_HDR; + wr = fwrite(&wr,sizeof(uint),1,fp); + wr += fwrite(&len,sizeof(uint),1,fp); + wr += fwrite(&ones,sizeof(uint),1,fp); + wr += fwrite(&C_len,sizeof(uint),1,fp); + wr += fwrite(&C_field_bits,sizeof(uint),1,fp); + wr += fwrite(&O_len,sizeof(uint),1,fp); + wr += fwrite(&O_bits_len,sizeof(uint),1,fp); + wr += fwrite(&sample_rate,sizeof(uint),1,fp); + if(wr!=8) return -1; + wr = fwrite(C,sizeof(uint),uint_len(C_len,C_field_bits),fp); + if(wr!=uint_len(C_len,C_field_bits)) return -1; + wr = fwrite(O,sizeof(uint),O_len,fp); + if(wr!=O_len) return -1; + return 0; +} + +static_bitsequence_rrr02 * static_bitsequence_rrr02::load(FILE * fp) { + static_bitsequence_rrr02 * ret = new static_bitsequence_rrr02(); + uint rd = 0, type; + rd += fread(&type,sizeof(uint),1,fp); + rd += fread(&ret->len,sizeof(uint),1,fp); + rd += fread(&ret->ones,sizeof(uint),1,fp); + rd += fread(&ret->C_len,sizeof(uint),1,fp); + rd += fread(&ret->C_field_bits,sizeof(uint),1,fp); + rd += fread(&ret->O_len,sizeof(uint),1,fp); + rd += fread(&ret->O_bits_len,sizeof(uint),1,fp); + rd += fread(&ret->sample_rate,sizeof(uint),1,fp); + if(rd!=8 || type!=RRR02_HDR) { + delete ret; + return NULL; + } + ret->C = new uint[uint_len(ret->C_len,ret->C_field_bits)]; + rd = fread(ret->C,sizeof(uint),uint_len(ret->C_len,ret->C_field_bits),fp); + if(rd!=uint_len(ret->C_len,ret->C_field_bits)) { + ret->C=NULL; + delete ret; + return NULL; + } + ret->O = new uint[ret->O_len]; + rd = fread(ret->O,sizeof(uint),ret->O_len,fp); + if(rd!=ret->O_len) { + ret->O=NULL; + delete ret; + return NULL; + } + ret->create_sampling(ret->sample_rate); + return ret; +} diff --git a/src/static_bitsequence/static_bitsequence_rrr02.h b/src/static_bitsequence/static_bitsequence_rrr02.h new file mode 100644 index 0000000..e50d273 --- /dev/null +++ b/src/static_bitsequence/static_bitsequence_rrr02.h @@ -0,0 +1,108 @@ +/* static_bitsequence_rrr02.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * RRR02 Bitsequence - + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#ifndef _STATIC_BITSEQUENCE_RRR02_H +#define _STATIC_BITSEQUENCE_RRR02_H + +#define BLOCK_SIZE 15 +#define DEFAULT_SAMPLING 32 + +#include +#include +#include +#include + +//using namespace std; + +/** Implementation of Raman, Raman and Rao's [1] proposal for rank/select capable + * data structures, it achieves space nH_0, O(sample_rate) time for rank and O(log len) + * for select. The practial implementation is based on [2] + * + * [1] R. Raman, V. Raman and S. Rao. Succinct indexable dictionaries with applications + * to encoding $k$-ary trees and multisets. SODA02. + * [2] F. Claude and G. Navarro. Practical Rank/Select over Arbitrary Sequences. SPIRE08. + * + * @author Francisco Claude + */ +class static_bitsequence_rrr02: public static_bitsequence { +public: + static_bitsequence_rrr02(uint * bitseq, uint len, uint sample_rate=DEFAULT_SAMPLING); + virtual ~static_bitsequence_rrr02(); + + /** Returns the number of zeros until position i */ + virtual uint rank0(uint i); + + /** Returns the number of ones until position i */ + virtual uint rank1(uint i); + + /** Returns the position of the i-th zero + * @return (uint)-1 if i=0, len if i>num_zeros or the position */ + virtual uint select0(uint i); + + /** Returns the position of the i-th one + * @return (uint)-1 if i=0, len if i>num_ones or the position */ + virtual uint select1(uint i); + + /** Returns the i-th bit */ + virtual bool access(uint i); + + /** Returns the size of the structure in bytes */ + virtual uint size(); + + /** Stores the bitmap given a file pointer, return 0 in case of success */ + virtual int save(FILE * fp); + + /** Reads the bitmap from a file pointer, returns NULL in case of error */ + static static_bitsequence_rrr02 * load(FILE * fp); + + /** Creates a new sampling for the queries */ + void create_sampling(uint sampling_rate); + + /** Frees the space required by the table E, which is static and global + * to all instances. + */ + static void delete_E() { + delete E; + } + + +protected: + static_bitsequence_rrr02(); + /** Classes and offsets */ + uint *C, *O; + /** Length of C and O (in uints) */ + uint C_len, O_len; + /** Bits required per field for C and in total for O */ + uint C_field_bits, O_bits_len; + /** C and O samplings */ + uint *C_sampling, *O_pos; + /** Length of the samplings */ + uint C_sampling_len,O_pos_len; + /** Lenght in bits per field */ + uint C_sampling_field_bits,O_pos_field_bits; + /** Sample rate */ + uint sample_rate; + + static table_offset * E; +}; + +#endif /* _STATIC_BITSEQUENCE_RRR02_H */ diff --git a/src/static_bitsequence/static_bitsequence_rrr02_light.cpp b/src/static_bitsequence/static_bitsequence_rrr02_light.cpp new file mode 100644 index 0000000..793b7af --- /dev/null +++ b/src/static_bitsequence/static_bitsequence_rrr02_light.cpp @@ -0,0 +1,374 @@ +/* static_bitsequence_rrr02_light.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_bitsequence_rrr02_light definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +using std::min; +using std::max; +#define VARS_NEEDED uint C_len = len/BLOCK_SIZE_LIGHT + (len%BLOCK_SIZE_LIGHT!=0);\ +uint C_field_bits = bits(BLOCK_SIZE_LIGHT);\ +uint O_len = uint_len(1,O_bits_len);\ +uint C_sampling_len = C_len/sample_rate+2;\ +uint C_sampling_field_bits = bits(ones);\ +uint O_pos_len = C_len/sample_rate+1;\ +uint O_pos_field_bits = bits(O_bits_len); + + +table_offset * static_bitsequence_rrr02_light::E = NULL; + +static_bitsequence_rrr02_light::static_bitsequence_rrr02_light() { + ones=0; + len=0; + if(E==NULL) E = new table_offset(BLOCK_SIZE_LIGHT); + E->use(); + C = NULL; + O = NULL; + C_sampling = NULL; + O_pos = NULL; + sample_rate = DEFAULT_SAMPLING_LIGHT; + O_bits_len = 0; +} + +static_bitsequence_rrr02_light::static_bitsequence_rrr02_light(uint * bitseq, uint len, uint sample_rate) { + ones = 0; + this->len = len; + if(E==NULL) E = new table_offset(BLOCK_SIZE_LIGHT); + E->use(); + // Table C + uint C_len = len/BLOCK_SIZE_LIGHT + (len%BLOCK_SIZE_LIGHT!=0); + uint C_field_bits = bits(BLOCK_SIZE_LIGHT); + C = new uint[uint_len(C_len,C_field_bits)]; + for(uint i=0;iget_log2binomial(BLOCK_SIZE_LIGHT,value); + } + // Table O + uint O_len = uint_len(1,O_bits_len); + O = new uint[O_len]; + for(uint i=0;iget_log2binomial(BLOCK_SIZE_LIGHT,popcount(value))-1,E->compute_offset((ushort)value)); + O_pos += E->get_log2binomial(BLOCK_SIZE_LIGHT,popcount(value)); + } + C_sampling = NULL; + this->O_pos = NULL; + + create_sampling(sample_rate); +} + +void static_bitsequence_rrr02_light::create_sampling(uint sample_rate) { + this->sample_rate = sample_rate; +/* for(uint i=0;iget_log2binomial(BLOCK_SIZE_LIGHT,get_field(C,C_field_bits,i)); + }*/ + // Sampling for C + uint C_len = len/BLOCK_SIZE_LIGHT + (len%BLOCK_SIZE_LIGHT!=0); + uint C_field_bits = bits(BLOCK_SIZE_LIGHT); + uint C_sampling_len = C_len/sample_rate+2; + uint C_sampling_field_bits = bits(ones); + if(C_sampling!=NULL) delete [] C_sampling; + C_sampling = new uint[max((uint)1,uint_len(C_sampling_len,C_sampling_field_bits))]; + for(uint i=0;iget_log2binomial(BLOCK_SIZE_LIGHT,get_field(C,C_field_bits,i)); + } +} + +bool static_bitsequence_rrr02_light::access(uint i) { + uint C_field_bits = bits(BLOCK_SIZE_LIGHT); + uint O_pos_field_bits = bits(O_bits_len); + uint nearest_sampled_value = i/BLOCK_SIZE_LIGHT/sample_rate; + uint pos_O = get_field(O_pos,O_pos_field_bits,nearest_sampled_value); + uint pos = i/BLOCK_SIZE_LIGHT; + for(uint k=nearest_sampled_value*sample_rate;kget_log2binomial(BLOCK_SIZE_LIGHT,aux); + } + uint c = get_field(C,C_field_bits,pos); + return ((1<<(i%BLOCK_SIZE_LIGHT))&E->short_bitmap(c,get_var_field(O,pos_O,pos_O+E->get_log2binomial(BLOCK_SIZE_LIGHT,c)-1)))!=0; +} + +uint static_bitsequence_rrr02_light::rank0(uint i) { + if(i+1==0) return 0; + return 1+i-rank1(i); +} + +uint static_bitsequence_rrr02_light::rank1(uint i) { + uint C_field_bits = bits(BLOCK_SIZE_LIGHT); + uint C_sampling_field_bits = bits(ones); + uint O_pos_field_bits = bits(O_bits_len); + if(i+1==0) return 0; + uint nearest_sampled_value = i/BLOCK_SIZE_LIGHT/sample_rate; + uint sum = get_field(C_sampling,C_sampling_field_bits,nearest_sampled_value); + uint pos_O = get_field(O_pos,O_pos_field_bits,nearest_sampled_value); + uint pos = i/BLOCK_SIZE_LIGHT; + uint k=nearest_sampled_value*sample_rate; + if(k%2==1 && kget_log2binomial(BLOCK_SIZE_LIGHT,aux); + k++; + } + uchar * a = (uchar *)C; + uint mask = 0x0F; + a += k/2; + while(k<(uint)max(0,(int)pos-1)) { + assert(((*a)&mask)==get_field(C,C_field_bits,k)); + assert((*a)/16==get_field(C,C_field_bits,k+1)); + sum += ((*a)&mask)+(*a)/16; + pos_O += E->get_log2binomial(BLOCK_SIZE_LIGHT,((*a)&mask))+E->get_log2binomial(BLOCK_SIZE_LIGHT,((*a)/16)); + a++; + k+=2; + } + if(kget_log2binomial(BLOCK_SIZE_LIGHT,aux); + k++; + } + uint c = get_field(C,C_field_bits,pos); + sum += popcount(((2<<(i%BLOCK_SIZE_LIGHT))-1) & E->short_bitmap(c,get_var_field(O,pos_O,pos_O+E->get_log2binomial(BLOCK_SIZE_LIGHT,c)-1))); + return sum; +} + +uint static_bitsequence_rrr02_light::select0(uint i) { + uint C_len = len/BLOCK_SIZE_LIGHT + (len%BLOCK_SIZE_LIGHT!=0); + uint C_field_bits = bits(BLOCK_SIZE_LIGHT); + uint C_sampling_len = C_len/sample_rate+2; + uint C_sampling_field_bits = bits(ones); + uint O_pos_field_bits = bits(O_bits_len); + if(i==0) return -1; + if(i>len-ones) return len; + // Search over partial sums + uint start=0; + uint end=C_sampling_len-1; + uint med, acc=0, pos; + while(start=i) break; + pos_O += E->get_log2binomial(BLOCK_SIZE_LIGHT,s); + acc += BLOCK_SIZE_LIGHT-s; + } + pos = (pos)*BLOCK_SIZE_LIGHT; + // Search inside the block + + while(accget_log2binomial(BLOCK_SIZE_LIGHT,s); + uint block = E->short_bitmap(s,get_var_field(O,pos_O,new_posO-1)); + pos_O = new_posO; + new_posO = 0; + while(accones) return len; + // Search over partial sums + uint start=0; + uint end=C_sampling_len-1; + uint med, acc=0, pos; + while(start=i) break; + pos_O += E->get_log2binomial(BLOCK_SIZE_LIGHT,s); + acc += s; + } + pos = (pos)*BLOCK_SIZE_LIGHT; + //cout << "pos=" << pos << endl; + // Search inside the block + while(accget_log2binomial(BLOCK_SIZE_LIGHT,s); + uint block = E->short_bitmap(s,get_var_field(O,pos_O,new_posO-1)); + pos_O = new_posO; + new_posO = 0; + while(accsize() << endl;*/ + uint sum = sizeof(uint)*8;//sizeof(static_bitsequence_rrr02_light); + sum += uint_len(C_len,C_field_bits)*sizeof(uint); + sum += O_len*sizeof(uint); + sum += uint_len(C_sampling_len,C_sampling_field_bits)*sizeof(uint); + sum += uint_len(O_pos_len,O_pos_field_bits)*sizeof(uint); + //sum += E->size(); + return sum; +} + +static_bitsequence_rrr02_light::~static_bitsequence_rrr02_light() { + if(C!=NULL) delete [] C; + if(O!=NULL) delete [] O; + if(C_sampling!=NULL) delete [] C_sampling; + if(O_pos!=NULL) delete [] O_pos; + E = E->unuse(); +} + +int static_bitsequence_rrr02_light::save(FILE * fp) { + uint C_len = len/BLOCK_SIZE_LIGHT + (len%BLOCK_SIZE_LIGHT!=0); + uint C_field_bits = bits(BLOCK_SIZE_LIGHT); + uint O_len = uint_len(1,O_bits_len); + uint wr = RRR02_LIGHT_HDR; + wr = fwrite(&wr,sizeof(uint),1,fp); + wr += fwrite(&len,sizeof(uint),1,fp); + wr += fwrite(&ones,sizeof(uint),1,fp); + wr += fwrite(&O_bits_len,sizeof(uint),1,fp); + wr += fwrite(&sample_rate,sizeof(uint),1,fp); + if(wr!=5) return -1; + wr = fwrite(C,sizeof(uint),uint_len(C_len,C_field_bits),fp); + if(wr!=uint_len(C_len,C_field_bits)) return -1; + wr = fwrite(O,sizeof(uint),O_len,fp); + if(wr!=O_len) return -1; + return 0; +} + +static_bitsequence_rrr02_light * static_bitsequence_rrr02_light::load(FILE * fp) { + static_bitsequence_rrr02_light * ret = new static_bitsequence_rrr02_light(); + uint rd = 0, type; + rd += fread(&type,sizeof(uint),1,fp); + rd += fread(&ret->len,sizeof(uint),1,fp); + rd += fread(&ret->ones,sizeof(uint),1,fp); + rd += fread(&ret->O_bits_len,sizeof(uint),1,fp); + rd += fread(&ret->sample_rate,sizeof(uint),1,fp); + uint C_len = ret->len/BLOCK_SIZE_LIGHT + (ret->len%BLOCK_SIZE_LIGHT!=0); + uint C_field_bits = bits(BLOCK_SIZE_LIGHT); + uint O_len = uint_len(1,ret->O_bits_len); + if(rd!=5 || type!=RRR02_LIGHT_HDR) { + delete ret; + return NULL; + } + ret->C = new uint[uint_len(C_len,C_field_bits)]; + rd = fread(ret->C,sizeof(uint),uint_len(C_len,C_field_bits),fp); + if(rd!=uint_len(C_len,C_field_bits)) { + ret->C=NULL; + delete ret; + return NULL; + } + ret->O = new uint[O_len]; + rd = fread(ret->O,sizeof(uint),O_len,fp); + if(rd!=O_len) { + ret->O=NULL; + delete ret; + return NULL; + } + ret->create_sampling(ret->sample_rate); + return ret; +} diff --git a/src/static_bitsequence/static_bitsequence_rrr02_light.h b/src/static_bitsequence/static_bitsequence_rrr02_light.h new file mode 100644 index 0000000..2a5d57f --- /dev/null +++ b/src/static_bitsequence/static_bitsequence_rrr02_light.h @@ -0,0 +1,100 @@ +/* static_bitsequence_rrr02_light.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * RRR02 Bitsequence - light version + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#ifndef _STATIC_BITSEQUENCE_RRR02_LIGHT_H +#define _STATIC_BITSEQUENCE_RRR02_LIGHT_H + +#define BLOCK_SIZE_LIGHT 15 +#define DEFAULT_SAMPLING_LIGHT 32 + +#include +#include +#include +#include + +//using namespace std; + +/** Implementation of Raman, Raman and Rao's [1] proposal for rank/select capable + * data structures, it achieves space nH_0, O(sample_rate) time for rank and O(log len) + * for select. The practial implementation is based on [2] + * + * [1] R. Raman, V. Raman and S. Rao. Succinct indexable dictionaries with applications + * to encoding $k$-ary trees and multisets. SODA02. + * [2] F. Claude and G. Navarro. Practical Rank/Select over Arbitrary Sequences. SPIRE08. + * + * @author Francisco Claude + */ +class static_bitsequence_rrr02_light: public static_bitsequence { +public: + static_bitsequence_rrr02_light(uint * bitseq, uint len, uint sample_rate=DEFAULT_SAMPLING_LIGHT); + virtual ~static_bitsequence_rrr02_light(); + + /** Returns the number of zeros until position i */ + virtual uint rank0(uint i); + + /** Returns the number of ones until position i */ + virtual uint rank1(uint i); + + /** Returns the position of the i-th zero + * @return (uint)-1 if i=0, len if i>num_zeros or the position */ + virtual uint select0(uint i); + + /** Returns the position of the i-th one + * @return (uint)-1 if i=0, len if i>num_ones or the position */ + virtual uint select1(uint i); + + /** Returns the i-th bit */ + virtual bool access(uint i); + + /** Returns the size of the structure in bytes */ + virtual uint size(); + + /** Stores the bitmap given a file pointer, return 0 in case of success */ + virtual int save(FILE * fp); + + /** Reads the bitmap from a file pointer, returns NULL in case of error */ + static static_bitsequence_rrr02_light * load(FILE * fp); + + /** Creates a new sampling for the queries */ + void create_sampling(uint sampling_rate); + + /** Frees the space required by the table E, which is static and global + * to all instances. + */ + static void delete_E() { + delete E; + } + +protected: + static_bitsequence_rrr02_light(); + /** Classes and offsets */ + uint *C, *O; + uint O_bits_len; + /** C and O samplings */ + uint *C_sampling, *O_pos; + /** Sample rate */ + uint sample_rate; + + static table_offset * E; +}; + +#endif /* _STATIC_BITSEQUENCE_RRR02_H */ diff --git a/src/static_bitsequence/static_bitsequence_sdarray.cpp b/src/static_bitsequence/static_bitsequence_sdarray.cpp new file mode 100644 index 0000000..8ce9164 --- /dev/null +++ b/src/static_bitsequence/static_bitsequence_sdarray.cpp @@ -0,0 +1,82 @@ + +#include + +static_bitsequence_sdarray::static_bitsequence_sdarray(uint * buff, uint len) { + uint * tmp_seq = new uint[uint_len(len,1)+1]; + ones = 0; + for(uint i=0;ilen = len; + //sd.lasti=(uint)-3; + //this->ones = sd.m; + delete [] tmp_seq; +} + + +static_bitsequence_sdarray::static_bitsequence_sdarray() {make___selecttbl();} + +static_bitsequence_sdarray::~static_bitsequence_sdarray() { + if(ones) + selects3_free(&sd); +} + + +uint static_bitsequence_sdarray::rank1(uint i) { + if(i>=len) return -1; + if(ones) + return selects3_rank(&sd,i); + else + return 0; +} + + +uint static_bitsequence_sdarray::select1(uint i) { + if(i>ones || i==0) return -1; + if(ones) + return selects3_select(&sd,i); + else + return (uint)-1; +} + + +uint static_bitsequence_sdarray::select_next1(uint i) { + return selects3_selectnext(&sd,i); +} + + +uint static_bitsequence_sdarray::size() { + return sizeof(static_bitsequence_sdarray)+(ones?(sd.size + sd.sd0->size + sd.sd1->size):0); +} + + +int static_bitsequence_sdarray::save(FILE * fp) { + uint wr = SDARRAY_HDR; + wr = fwrite(&wr,sizeof(uint),1,fp); + wr += fwrite(&len,sizeof(uint),1,fp); + wr += fwrite(&ones,sizeof(uint),1,fp); + if(wr!=3 || (ones?(selects3_save(&sd,fp)):false)) + return 1; + return 0; +} + + +static_bitsequence_sdarray * static_bitsequence_sdarray::load(FILE * fp) { + uint id; + if(fread(&id,sizeof(uint),1,fp)!=1) return NULL; + if(id!=SDARRAY_HDR) return NULL; + static_bitsequence_sdarray * ret = new static_bitsequence_sdarray(); + id = fread(&ret->len,sizeof(uint),1,fp); + id += fread(&ret->ones,sizeof(uint),1,fp); + if(ret->ones && selects3_load(&ret->sd,fp)) { + delete ret; + return NULL; + } + return ret; +} diff --git a/src/static_bitsequence/static_bitsequence_sdarray.h b/src/static_bitsequence/static_bitsequence_sdarray.h new file mode 100644 index 0000000..f8944d4 --- /dev/null +++ b/src/static_bitsequence/static_bitsequence_sdarray.h @@ -0,0 +1,30 @@ + +#ifndef _STATIC_BITSEQUENCE_SDARRAY_H +#define _STATIC_BITSEQUENCE_SDARRAY_H + +#include +#include +#include + +class static_bitsequence_sdarray: public static_bitsequence { + public: + static_bitsequence_sdarray(uint * buff, uint len); + virtual ~static_bitsequence_sdarray(); + virtual uint select1(uint i); + virtual uint rank1(uint i); + virtual uint select_next1(uint i); + virtual uint size(); + virtual int save(FILE * fp); + static static_bitsequence_sdarray * load(FILE * fp); + + uint select_next1_unsafe(uint i){ + return selects3_selectnext(&sd,i); + }; + protected: + selects3 sd; + static_bitsequence_sdarray(); + +}; + +#endif + diff --git a/src/static_bitsequence/table_offset.cpp b/src/static_bitsequence/table_offset.cpp new file mode 100644 index 0000000..e2350ed --- /dev/null +++ b/src/static_bitsequence/table_offset.cpp @@ -0,0 +1,123 @@ +/* table_offset.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * Table for offsets. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include "table_offset.h" + + +// Interface for old implementation +void genera(ushort * bch, uint u, ushort * F, uint lF); +uint generaClase(ushort * bch, uint u, uint clase, uint puestos, uint pos_ini, uint generado); +uint offset_func(uint u, uint busca); +uint offsetRecursivo(uint u, uint busca, uint clase, uint puestos, uint pos_ini, uint generado); +uint __indiceFunc; +uint __indAcumulado; +ushort * __Lis; +// End interface old implementation + + +table_offset::table_offset(uint u) { + this->u = u; + users_count = 0; + short_bitmaps = new ushort[((1< +#include + +//using namespace std; + +/** Universal table required for static_bitsequence_rrr02, Raman, Raman and Rao's [1] + * proposal for rank/select capable data structures, it achieves space nH_0, + * O(sample_rate) time for rank and O(log len) for select. The practial implementation + * is based on [2] + * + * [1] R. Raman, V. Raman and S. Rao. Succinct indexable dictionaries with applications + * to encoding $k$-ary trees and multisets. SODA02. + * [2] F. Claude and G. Navarro. Practical Rank/Select over Arbitrary Sequences. SPIRE08. + * + * @author Francisco Claude + */ +class table_offset { + +public: + /** builds a universal table, designed for u<=15 */ + table_offset(uint u); + ~table_offset(); + + /** Increments the counter of users for the table */ + inline void use() { + users_count++; + } + + /** Tells the object that the user is not going to need the table anymore. */ + inline table_offset * unuse() { + users_count--; + if(!users_count) { + delete this; + return NULL; + } + return this; + } + + /** Computes binomial(n,k) for n,k<=u */ + inline uint get_binomial(uint n, uint k) { + return binomial[n][k]; + } + + /** Computes ceil(log2(binomial(n,k))) for n,k<=u */ + inline ushort get_log2binomial(uint n, uint k) { + return log2binomial[n][k]; + } + + /** Returns the bitmap represented by the given class and inclass offsets */ + inline ushort short_bitmap(uint class_offset, uint inclass_offset) { + if(class_offset==0) return 0; + if(class_offset==u) return (ushort)(((uint)1< +#include +#include + +int compare(const void *p1, const void *p2) { + return ((auxbwd *)p1)->key - ((auxbwd *)p2)->key; +} + + +perm createPerm(uint *elems, uint nelems, uint t, static_bitsequence_builder * bmb) { + perm P; + uint *b, *baux, nextelem, i, j, bptr, + aux, antbptr,nbwdptrs, elem,nbits, firstelem, cyclesize; + auxbwd *auxbwdptr; + P = new struct sperm; + P->elems = elems; + P->nelems = nelems; + P->nbits = bits(nelems-1); + nbits = bits(nelems-1); + P->t = t; + if (t==1) { + P->bwdptrs = new uint[uint_len(nelems,nbits)]; + assert(P->bwdptrs!=NULL); + P->nbwdptrs = nelems; + for (i=0; ibwdptrs, nbits, bg, i); + } + P->bmap = NULL; + } + else { + auxbwdptr = new auxbwd[(t+((int)ceil((double)nelems/t)))]; + assert(auxbwdptr!=NULL); + b = new uint[uint_len(nelems,1)]; + for(i=0;i= t) { + auxbwdptr[nbwdptrs].key = j; + auxbwdptr[nbwdptrs++].pointer = bptr; + antbptr = bptr; + bptr = j; + aux = 0; + bitset(b, j); + } + cyclesize++; + } + if (cyclesize >= t) { + auxbwdptr[nbwdptrs].key = nextelem; + auxbwdptr[nbwdptrs++].pointer = bptr; + bitset(b, nextelem); + } + } + } + qsort(auxbwdptr, nbwdptrs, sizeof(auxbwd), &compare); + aux = uint_len(nbwdptrs,P->nbits); + P->bwdptrs = new uint[aux]; + assert(P->bwdptrs!=NULL); + for(i=0;ibwdptrs[i] = 0; + P->nbwdptrs = nbwdptrs; + for (i = 0; i < nbwdptrs; i++) { + set_field(P->bwdptrs, nbits, i, auxbwdptr[i].pointer); + //if(i<5) + // printf(" %d ",get_field(P->bwdptrs,nbits,i)); + } + //printf("\n"); + P->bmap = bmb->build(b, nelems); + //delete [] P->bmap; + delete [] b; + delete [] (baux); + delete [] (auxbwdptr); + } + return P; +} + + +void destroyPerm(perm P) { + delete [] P->elems; + if (P->bmap) delete P->bmap; + delete [] P->bwdptrs; + delete P; +} + + +// Computes P-1[i] +uint inversePerm(perm P, uint i) { + uint j, elem; + if (P->t==1) { + j = get_field(P->bwdptrs,P->nbits,i); + } + else { + j = i; + while (((elem=get_field(P->elems,P->nbits,j)) != i)&&(!P->bmap->access(j))) + j = elem; + + if (elem != i) { + // follows the backward pointer + j = get_field(P->bwdptrs, P->nbits, P->bmap->rank1(j-1)); + while ((elem = get_field(P->elems,P->nbits,j))!= i) + j = elem; + } + } + return j; +} + + +// gets the ith element of a perm P + +uint getelemPerm(perm P, uint i) { + return get_field(P->elems, P->nbits, i); +} + + +uint savePerm(perm P, FILE *f) { + uint aux; + uint v; + + if (fwrite(&P->nelems,sizeof(uint),1,f) != 1) { + fprintf(stderr,"Error: Cannot write Permutation on file\n"); + exit(1); + } + + aux = uint_len(P->nelems,P->nbits); + if (fwrite(P->elems,sizeof(uint),aux,f) != aux) { + fprintf(stderr,"Error: Cannot write Permutation on file\n"); + exit(1); + } + + aux = ((P->nelems+W-1)/W); + + if (P->bmap) { + v=1; + if (fwrite(&v,sizeof(uint),1,f) != 1) { + fprintf(stderr,"Error: Cannot write Permutation on file\n"); + exit(1); + } + P->bmap->save(f); + } + else { + v=0; + if (fwrite(&v,sizeof(uint),1,f) != 1) { + fprintf(stderr,"Error: Cannot write Permutation on file\n"); + exit(1); + } + } + + if (fwrite(&P->nbwdptrs,sizeof(uint),1,f) != 1) { + fprintf(stderr,"Error: Cannot write Permutation on file\n"); + exit(1); + } + + aux = uint_len(P->nbwdptrs,P->nbits); + if (fwrite(P->bwdptrs,sizeof(uint),aux,f) != aux) { + fprintf(stderr,"Error: Cannot write Permutation on file\n"); + exit(1); + } + if (fwrite(&P->t,sizeof(uint),1,f) != 1) { + fprintf(stderr,"Error: Cannot write Permutation on file\n"); + exit(1); + } + return 0; +} + + +perm loadPerm(FILE *f) { + uint aux; + perm P; + uint v; + + P = new struct sperm; //(struct sperm*) malloc(sizeof(struct sperm)); + + if (fread(&P->nelems,sizeof(uint),1,f) != 1) { + fprintf(stderr,"Error: Cannot read Permutation from file\n"); + exit(1); + } + P->nbits = bits(P->nelems-1); + aux = uint_len(P->nelems,P->nbits); + P->elems = new uint[aux]; //(uint *)malloc(aux*sizeof(uint)); + + if (fread(P->elems,sizeof(uint),aux,f) != aux) { + fprintf(stderr,"Error: Cannot read Permutation from file\n"); + exit(1); + } + + if (fread(&v,sizeof(uint),1,f) != 1) { + fprintf(stderr,"Error: Cannot read Permutation from file\n"); + exit(1); + } + + if (v) { + P->bmap = static_bitsequence::load(f); + } + else P->bmap = NULL; + + if (fread(&P->nbwdptrs,sizeof(uint),1,f) != 1) { + fprintf(stderr,"Error: Cannot read Permutation from file\n"); + exit(1); + } + + aux = uint_len(P->nbwdptrs,P->nbits); + P->bwdptrs = new uint[aux]; //(uint*) malloc(aux*sizeof(uint)); + + if (fread(P->bwdptrs,sizeof(uint),aux,f) != aux) { + fprintf(stderr,"Error: Cannot read Permutation from file\n"); + exit(1); + } + + if (fread(&P->t,sizeof(uint),1,f) != 1) { + fprintf(stderr,"Error: Cannot read Permutation from file\n"); + exit(1); + } + + return P; +} + + +uint sizeofPerm(perm P) { + return sizeof(struct sperm) + + ((uint_len(P->nelems,P->nbits))*sizeof(uint)) + + ((P->bmap)?(P->bmap->size()):0) + + ((uint_len(P->nbwdptrs,P->nbits))*sizeof(uint)); +} diff --git a/src/static_permutation/perm.h b/src/static_permutation/perm.h new file mode 100755 index 0000000..20d0bf6 --- /dev/null +++ b/src/static_permutation/perm.h @@ -0,0 +1,88 @@ +/* perm.h + * Copyright (C) 2005, Diego Arroyuelo, all rights reserved. + * + * Permutation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef PERMINCLUDED +#define PERMINCLUDED + +#include +#include +#include + +typedef struct sperm +{ + uint *elems; // elements of the permutation + uint nelems; // # of elements + static_bitsequence * bmap; // bitmap allowing rank() queries in O(1) time + uint *bwdptrs; // array of backward pointers + uint nbits; // log(nelems) + uint nbwdptrs; // # of backward pointers + uint t; +} *perm; + +typedef struct +{ + uint key; + uint pointer; +} auxbwd; + +/** Creates a permutation + * + * @author Diego Arroyuelo + */ +perm createPerm(uint *elems, uint nelems, uint t, static_bitsequence_builder * bmb); + +/** Gets the i-th element of the permutation + * + * @author Diego Arroyuelo + */ +uint getelemPerm(perm P, uint i); + +/** Destroys a permutation + * + * @author Diego Arroyuelo + */ +void destroyPerm(perm P); + +/** Get pi(i)^{-1} + * + * @author Diego Arroyuelo + */ +uint inversePerm(perm P, uint i); + +/** Saves a permutation + * + * @author Diego Arroyuelo + */ +uint savePerm(perm P, FILE *f); + +/** Loads a permutation + * + * @author Diego Arroyuelo + */ +perm loadPerm(FILE *f); + +/** Returns the size of the data structure + * + * @author Diego Arroyuelo + */ +uint sizeofPerm(perm P); + +#endif diff --git a/src/static_permutation/static_permutation.cpp b/src/static_permutation/static_permutation.cpp new file mode 100644 index 0000000..b47059b --- /dev/null +++ b/src/static_permutation/static_permutation.cpp @@ -0,0 +1,33 @@ +/* static_permutation.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * Permutation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include + +static_permutation * static_permutation::load(FILE *fp) { + uint rd; + if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL; + fseek(fp,-sizeof(uint),SEEK_CUR); + switch(rd) { + case STATIC_PERMUTATION_MRRR_HDR: return static_permutation_mrrr::load(fp); + } + return NULL; +} diff --git a/src/static_permutation/static_permutation.h b/src/static_permutation/static_permutation.h new file mode 100644 index 0000000..7d8ffef --- /dev/null +++ b/src/static_permutation/static_permutation.h @@ -0,0 +1,49 @@ +/* static_permutation.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * Permutation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _STATIC_PERMUTATION_H +#define _STATIC_PERMUTATION_H + +#include + +#define STATIC_PERMUTATION_MRRR_HDR 2 + +/** Base class for static permutations + * @author Francisco Claude + */ +class static_permutation { + public: + virtual ~static_permutation() {} + /** Computes the i-th element of the permutation */ + virtual uint pi(uint i)=0; + /** Computes the inverse of i */ + virtual uint rev_pi(uint i)=0; + /** Saves the permutation to fp, returns 0 in case of success */ + virtual uint save(FILE *fp)=0; + /** Returns the size of the permutation */ + virtual uint size()=0; + /** Loads a static_permutation from fp */ + static static_permutation * load(FILE *fp); +}; + +#include + +#endif diff --git a/src/static_permutation/static_permutation_builder.h b/src/static_permutation/static_permutation_builder.h new file mode 100644 index 0000000..2a860ad --- /dev/null +++ b/src/static_permutation/static_permutation_builder.h @@ -0,0 +1,40 @@ +/* static_permutation_builder.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * Permutation builder + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _STATIC_PERMUTATION_BUILDER_H +#define _STATIC_PERMUTATION_BUILDER_H + +#include +#include + +/** Base class for static permutation builders + * @author Francisco Claude + */ +class static_permutation_builder { + public: + virtual ~static_permutation_builder() {} + /** Returns a new permutation build for perm */ + virtual static_permutation * build(uint * perm, uint len)=0; +}; + +#include + +#endif diff --git a/src/static_permutation/static_permutation_builder_mrrr.cpp b/src/static_permutation/static_permutation_builder_mrrr.cpp new file mode 100644 index 0000000..996beb4 --- /dev/null +++ b/src/static_permutation/static_permutation_builder_mrrr.cpp @@ -0,0 +1,35 @@ +/* static_permutation_builder_mrrr.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * Permutation builder + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +static_permutation_builder_mrrr::static_permutation_builder_mrrr(uint t, static_bitsequence_builder * bmb) { + this->t = t; + this->bmb = bmb; +} + +static_permutation_builder_mrrr::~static_permutation_builder_mrrr() { + //delete bmb; +} + +static_permutation * static_permutation_builder_mrrr::build(uint * perm, uint len) { + return new static_permutation_mrrr(perm,len,t,bmb); +} diff --git a/src/static_permutation/static_permutation_builder_mrrr.h b/src/static_permutation/static_permutation_builder_mrrr.h new file mode 100644 index 0000000..051d7e2 --- /dev/null +++ b/src/static_permutation/static_permutation_builder_mrrr.h @@ -0,0 +1,46 @@ +/* static_permutation_builder_mrrr.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * Permutation builder + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _STATIC_PERMUTATION_BUILDER_MRRR_H +#define _STATIC_PERMUTATION_BUILDER_MRRR_H + +#include +#include +#include + +/** Base class for static permutation builders + * @author Francisco Claude + */ +class static_permutation_builder_mrrr : public static_permutation_builder { + public: + static_permutation_builder_mrrr(uint t, static_bitsequence_builder * bmb); + virtual ~static_permutation_builder_mrrr(); + /** Returns a new permutation build for perm */ + virtual static_permutation * build(uint * perm, uint len); + + protected: + uint t; + static_bitsequence_builder * bmb; +}; + +#include + +#endif diff --git a/src/static_permutation/static_permutation_mrrr.cpp b/src/static_permutation/static_permutation_mrrr.cpp new file mode 100644 index 0000000..7bb7a69 --- /dev/null +++ b/src/static_permutation/static_permutation_mrrr.cpp @@ -0,0 +1,62 @@ +/* static_permutation_mrrr.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * Permutation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#include + +static_permutation_mrrr::static_permutation_mrrr(uint * elems, uint nelems, uint t, static_bitsequence_builder * bmb) { + permutation = createPerm(elems, nelems, t, bmb); +} + +static_permutation_mrrr::static_permutation_mrrr() { +} + +static_permutation_mrrr::~static_permutation_mrrr() { + destroyPerm(permutation); +} + +uint static_permutation_mrrr::size() { + return sizeof(static_permutation)+sizeofPerm(permutation); +} + +uint static_permutation_mrrr::pi(uint i) { + return getelemPerm(permutation,i); +} + +uint static_permutation_mrrr::rev_pi(uint i) { + return inversePerm(permutation,i); +} + +uint static_permutation_mrrr::save(FILE *fp) { + uint wr = STATIC_PERMUTATION_MRRR_HDR; + wr = fwrite(&wr,sizeof(uint),1,fp); + if(wr!=1) return 1; + return savePerm(permutation,fp); +} + +static_permutation_mrrr * static_permutation_mrrr::load(FILE *fp) { + uint rd; + if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL; + if(rd!=STATIC_PERMUTATION_MRRR_HDR) return NULL; + static_permutation_mrrr * ret = new static_permutation_mrrr(); + ret->permutation = loadPerm(fp); + return ret; +} diff --git a/src/static_permutation/static_permutation_mrrr.h b/src/static_permutation/static_permutation_mrrr.h new file mode 100644 index 0000000..0f196a5 --- /dev/null +++ b/src/static_permutation/static_permutation_mrrr.h @@ -0,0 +1,51 @@ +/* static_permutation_mrrr.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * Permutation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _STATIC_PERMUTATION_MRRR_H +#define _STATIC_PERMUTATION_MRRR_H + +#include +#include +#include + +/** Wrapper for Diego Arroyuelo's implementation of Munro et al.'s permutations. + * @author Francisco Claude + */ +class static_permutation_mrrr : public static_permutation { + public: + static_permutation_mrrr(uint * elems, uint nelems, uint t, static_bitsequence_builder * bmb); + virtual ~static_permutation_mrrr(); + /** Computes the i-th element of the permutation */ + virtual uint pi(uint i); + /** Computes the inverse of i */ + virtual uint rev_pi(uint i); + /** Saves the permutation to fp, returns 0 in case of success */ + virtual uint save(FILE *fp); + /** Returns the size of the permutation */ + virtual uint size(); + /** Loads a static_permutation from fp */ + static static_permutation_mrrr * load(FILE *fp); + protected: + perm permutation; + static_permutation_mrrr(); +}; + +#endif diff --git a/src/static_sequence/static_sequence.cpp b/src/static_sequence/static_sequence.cpp new file mode 100644 index 0000000..bd86382 --- /dev/null +++ b/src/static_sequence/static_sequence.cpp @@ -0,0 +1,93 @@ +/* static_sequence.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_sequence definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +using std::max; +using std::min; +using std::cout; +using std::cin; +using std::endl; + + +static_sequence::static_sequence() {} +static_sequence::~static_sequence() {} +uint static_sequence::length() { return len; } + +uint static_sequence::count(uint s) { + return rank(s,len-1); +} + +static_sequence * static_sequence::load(FILE * fp) { + uint rd; + if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL; + fseek(fp,-sizeof(uint),SEEK_CUR); + switch(rd) { + case WVTREE_HDR: return static_sequence_wvtree::load(fp); + case GMR_CHUNK_HDR: return static_sequence_gmr_chunk::load(fp); + case GMR_HDR: return static_sequence_gmr::load(fp); + case WVTREE_NOPTRS_HDR: return static_sequence_wvtree_noptrs::load(fp); + case BS_HDR: return static_sequence_bs::load(fp); + } + return NULL; +} + +uint static_sequence::select_next(uint c, uint i) { + return select(c,rank(c,i)+1); +} + +bool static_sequence::test(uint * seq, uint n) { + uint sigma = 0; + for(uint i=0;i0 && rank(seq[i],i-1)!=occ[seq[i]]-1) { + cout << "rank-1 failed!" << endl; + delete [] occ; + return false; + } + if(select(seq[i],occ[seq[i]])!=i) { + cout << "select failed!" << endl; + cout << "select(" << seq[i] << "," << occ[seq[i]] << ")="< +#include +#include +using std::vector; +#define WVTREE_HDR 2 +#define GMR_CHUNK_HDR 3 +#define GMR_HDR 4 +#define WVTREE_NOPTRS_HDR 5 +#define BS_HDR 6 + +//using namespace std; + +/** Base class for static sequences, contains many abstract functions, so this can't + * be instantiated. + * + * @author Francisco Claude + */ +class static_sequence { + +public: + static_sequence(); + virtual ~static_sequence(); + + /** Returns the number of occurrences of c until position i */ + virtual uint rank(uint c, uint i)=0; + virtual uint rankLessThan(uint &i, uint j) + { + //assert(0); // Implemented only in static_sequence_wvtree + return -1; + } + + /** Returns the position of the i-th c + * @return (uint)-1 if i=0, len if i exceeds the number of cs */ + virtual uint select(uint c, uint i)=0; + virtual uint select_next(uint c, uint i); + + /** Returns the i-th element */ + virtual uint access(uint i)=0; + virtual uint access(uint i, uint &rank) + { + //assert(0); // Implemented only in static_sequence_wvtree + return -1; + } + + // Returns all elements from interval [i, j] such that + // their value is in [min, max]. + virtual vector access(uint i, uint j, uint min, uint max) + { + //assert(0); // Implemented only in static_sequence_wvtree + return vector(); + } + + // Returns all elements from interval [i, j] + virtual vector accessAll(uint i, uint j) + { + //assert(0); // Implemented only in static_sequence_wvtree + return vector(); + } + + // Counts the number of elements in interval [i,j] such that + // their values are in [min,max] + virtual uint count(uint i, uint j, uint min, uint max) + { + //assert(0); // Implemented only in static_sequence_wvtree + return 0; + } + + /** Returns the length of the sequence */ + virtual uint length(); + + /** Returns how many cs are in the sequence */ + virtual uint count(uint c); + + /** Returns the size of the structure in bytes */ + virtual uint size()=0; + + /** Stores the bitmap given a file pointer, return 0 in case of success */ + virtual uint save(FILE * fp)=0; + + virtual bool test(uint * seq, uint n); + + /** Reads a bitmap determining the type */ + static static_sequence * load(FILE * fp); + +protected: + /** Length of the bitstring */ + uint len; + +}; + +#include +#include +#include +#include +#include + +#endif /* _STATIC_SEQUENCE_H */ diff --git a/src/static_sequence/static_sequence_bs.cpp b/src/static_sequence/static_sequence_bs.cpp new file mode 100644 index 0000000..f04325e --- /dev/null +++ b/src/static_sequence/static_sequence_bs.cpp @@ -0,0 +1,126 @@ + +#include +using std::max; +static_sequence_bs::static_sequence_bs(uint * seq, uint n, alphabet_mapper * am, static_bitsequence_builder * bmb) { + sigma = 0; + len = n; + this->am = am; + am->use(); + for(uint i=0;imap(seq[i])); + sigma++; + uint * occ = new uint[sigma+1]; + for(uint i=0;i<=sigma;i++) occ[i] = 0; + for(uint i=0;imap(seq[i])+1]++; + for(uint i=1;imap(seq[i])]++]=i; + bitmaps = new static_bitsequence*[sigma]; + uint * bm = new uint[uint_len(n,1)]; + uint pp=0; + for(uint i=0;ibuild(bm,len); + } + delete [] bm; + delete [] occ; + delete [] pos; +} + +static_sequence_bs::static_sequence_bs() { + len = 0; + sigma = 0; + bitmaps = NULL; + am = NULL; +} + +static_sequence_bs::~static_sequence_bs() { + if(bitmaps!=NULL) { + for(uint i=0;iunuse(); +} + +uint static_sequence_bs::rank(uint c, uint i) { + if(am->map(c)>=sigma) return (uint)-1; + return bitmaps[am->map(c)]->rank1(i); +} +/* +uint static_sequence_bs::select(uint c, uint i) { + if(am->map(c)>=sigma) return (uint)-1; + return bitmaps[am->map(c)]->select1(i); +} + +uint static_sequence_bs::select_next(uint c, uint i) { + if(am->map(c)>=sigma) return (uint)-1; + return bitmaps[am->map(c)]->select_next1(i); +} +*/ +uint static_sequence_bs::select(uint c, uint i) { + if(c>=sigma) return (uint)-1; + return bitmaps[c]->select1(i); +} +uint static_sequence_bs::select_next(uint c, uint i) { + if(c>=sigma) return (uint)-1; + return bitmaps[c]->select_next1(i); +} +uint static_sequence_bs::access(uint i) { + for(uint j=0;jaccess(i)) return am->unmap(j); + } + return (uint)-1; +} + +uint static_sequence_bs::size() { + uint size = sizeof(static_sequence_bs)+am->size(); + for(uint i=0;isize(); + return size; +} + +uint static_sequence_bs::save(FILE * fp) { + uint wr = BS_HDR; + wr = fwrite(&wr,sizeof(uint),1,fp); + wr += fwrite(&len,sizeof(uint),1,fp); + wr += fwrite(&sigma,sizeof(uint),1,fp); + if(wr!=3) return 1; + for(uint i=0;isave(fp)) return 2; + if(am->save(fp)) return 3; + return 0; +} + +static_sequence_bs * static_sequence_bs::load(FILE * fp) { + uint rd = 0; + uint type = 0; + rd += fread(&type,sizeof(uint),1,fp); + static_sequence_bs * ret = new static_sequence_bs(); + rd += fread(&ret->len,sizeof(uint),1,fp); + rd += fread(&ret->sigma,sizeof(uint),1,fp); + if(rd!=3 || type != BS_HDR) { + delete ret; + return NULL; + } + ret->bitmaps = new static_bitsequence*[ret->sigma]; + for(uint i=0;isigma;i++) + ret->bitmaps[i] = NULL; + for(uint i=0;isigma;i++) + if((ret->bitmaps[i]=static_bitsequence::load(fp))==NULL) { + delete ret; + return NULL; + } + if((ret->am = alphabet_mapper::load(fp))==NULL) { + delete ret; + return NULL; + } + ret->am->use(); + return ret; +} + diff --git a/src/static_sequence/static_sequence_bs.h b/src/static_sequence/static_sequence_bs.h new file mode 100644 index 0000000..fe023d0 --- /dev/null +++ b/src/static_sequence/static_sequence_bs.h @@ -0,0 +1,71 @@ +/* static_sequence.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_sequence definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _STATIC_SEQUENCE_BS_H +#define _STATIC_SEQUENCE_BS_H + + +#include +#include +#include + +/** static_sequence represented using one bitmap per symbol, doesn't support efficient access + * + * @author Francisco Claude + */ +class static_sequence_bs : public static_sequence { + +public: + static_sequence_bs(uint * seq, uint n, alphabet_mapper * am, static_bitsequence_builder * bmb); + virtual ~static_sequence_bs(); + + virtual uint rank(uint c, uint i); + + virtual uint select(uint c, uint i); + uint select_next(uint c, uint i); + + virtual uint access(uint i); + + virtual uint size(); + + virtual uint save(FILE * fp); + + /** Reads a bitmap determining the type */ + static static_sequence_bs * load(FILE * fp); + + uint select_next_unsafe(uint c, uint i){ + static_bitsequence * bs = bitmaps[c]; + static_bitsequence_sdarray * sd = reinterpret_cast(bs); + return sd->select_next1_unsafe(i); + }; + +protected: + uint sigma; + static_bitsequence ** bitmaps; + alphabet_mapper * am; + + static_sequence_bs(); + +}; + + +#endif /* _STATIC_SEQUENCE_BS_H */ + diff --git a/src/static_sequence/static_sequence_builder.h b/src/static_sequence/static_sequence_builder.h new file mode 100644 index 0000000..6d0b340 --- /dev/null +++ b/src/static_sequence/static_sequence_builder.h @@ -0,0 +1,43 @@ +/* static_sequence_builder.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * Sequence builder + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _STATIC_SEQUENCE_BUILDER_H +#define _STATIC_SEQUENCE_BUILDER_H + +#include +#include + +/** Base class for static sequence builders + * @author Francisco Claude + */ +class static_sequence_builder { + public: + virtual ~static_sequence_builder() {} + /** Returns a new sequence build for seq */ + virtual static_sequence * build(uint * seq, uint len)=0; +}; + +#include +#include +#include +#include + +#endif diff --git a/src/static_sequence/static_sequence_builder_gmr.cpp b/src/static_sequence/static_sequence_builder_gmr.cpp new file mode 100644 index 0000000..b6781e1 --- /dev/null +++ b/src/static_sequence/static_sequence_builder_gmr.cpp @@ -0,0 +1,32 @@ +/* static_sequence_builder_gmr.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * Sequence builder gmr + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +static_sequence_builder_gmr::static_sequence_builder_gmr(uint chunk_length, static_bitsequence_builder *bmb, static_sequence_builder *ssb) { + this->chunk_length = chunk_length; + this->bmb = bmb; + this->ssb = ssb; +} + +static_sequence * static_sequence_builder_gmr::build(uint * seq, uint len) { + return new static_sequence_gmr(seq,len,chunk_length,bmb,ssb); +} diff --git a/src/static_sequence/static_sequence_builder_gmr.h b/src/static_sequence/static_sequence_builder_gmr.h new file mode 100644 index 0000000..1588818 --- /dev/null +++ b/src/static_sequence/static_sequence_builder_gmr.h @@ -0,0 +1,44 @@ +/* static_sequence_builder_gmr.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * Sequence builder gmr + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _STATIC_SEQUENCE_BUILDER_GMR_H +#define _STATIC_SEQUENCE_BUILDER_GMR_H + +#include +#include +#include + +/** gmr builder + * @author Francisco Claude + */ +class static_sequence_builder_gmr { + public: + static_sequence_builder_gmr(uint chunk_length, static_bitsequence_builder *bmb, static_sequence_builder *ssb); + virtual ~static_sequence_builder_gmr() {} + virtual static_sequence * build(uint * seq, uint len); + + protected: + static_bitsequence_builder *bmb; + static_sequence_builder *ssb; + uint chunk_length; +}; + +#endif diff --git a/src/static_sequence/static_sequence_builder_gmr_chunk.cpp b/src/static_sequence/static_sequence_builder_gmr_chunk.cpp new file mode 100644 index 0000000..3d3b758 --- /dev/null +++ b/src/static_sequence/static_sequence_builder_gmr_chunk.cpp @@ -0,0 +1,31 @@ +/* static_sequence_builder_gmr_chunk.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * Sequence builder gmr chunk + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +static_sequence_builder_gmr_chunk::static_sequence_builder_gmr_chunk(static_bitsequence_builder *bmb, static_permutation_builder *pmb) { + this->bmb = bmb; + this->pmb = pmb; +} + +static_sequence * static_sequence_builder_gmr_chunk::build(uint * seq, uint len) { + return new static_sequence_gmr_chunk(seq,len,bmb,pmb); +} diff --git a/src/static_sequence/static_sequence_builder_gmr_chunk.h b/src/static_sequence/static_sequence_builder_gmr_chunk.h new file mode 100644 index 0000000..6eccdc8 --- /dev/null +++ b/src/static_sequence/static_sequence_builder_gmr_chunk.h @@ -0,0 +1,44 @@ +/* static_sequence_builder_gmr_chunk.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * Sequence builder gmr chunk + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _STATIC_SEQUENCE_BUILDER_GMR_CHUNK_H +#define _STATIC_SEQUENCE_BUILDER_GMR_CHUNK_H + +#include +#include +#include +#include + +/** gmr chunk builder + * @author Francisco Claude + */ +class static_sequence_builder_gmr_chunk : public static_sequence_builder { + public: + static_sequence_builder_gmr_chunk(static_bitsequence_builder *bmb, static_permutation_builder *pmb); + virtual ~static_sequence_builder_gmr_chunk() {} + virtual static_sequence * build(uint * seq, uint len); + + protected: + static_bitsequence_builder *bmb; + static_permutation_builder *pmb; +}; + +#endif diff --git a/src/static_sequence/static_sequence_builder_wvtree.cpp b/src/static_sequence/static_sequence_builder_wvtree.cpp new file mode 100644 index 0000000..d539cdf --- /dev/null +++ b/src/static_sequence/static_sequence_builder_wvtree.cpp @@ -0,0 +1,32 @@ +/* static_sequence_builder_wvtree.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * Sequence builder wavelet tree + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +static_sequence_builder_wvtree::static_sequence_builder_wvtree(wt_coder * wc, static_bitsequence_builder *bmb, alphabet_mapper * am) { + this->bmb = bmb; + this->wc = wc; + this->am = am; +} + +static_sequence * static_sequence_builder_wvtree::build(uint * seq, uint len) { + return new static_sequence_wvtree(seq,len,wc,bmb,am); +} diff --git a/src/static_sequence/static_sequence_builder_wvtree.h b/src/static_sequence/static_sequence_builder_wvtree.h new file mode 100644 index 0000000..9c76be2 --- /dev/null +++ b/src/static_sequence/static_sequence_builder_wvtree.h @@ -0,0 +1,46 @@ +/* static_sequence_builder_wvtree.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * Sequence builder wavelet tree + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _STATIC_SEQUENCE_BUILDER_WVTREE_H +#define _STATIC_SEQUENCE_BUILDER_WVTREE_H + +#include +#include +#include +#include +#include + +/** Builder for wavelet trees + * @author Francisco Claude + */ +class static_sequence_builder_wvtree : public static_sequence_builder { + public: + static_sequence_builder_wvtree(wt_coder * wc, static_bitsequence_builder *bmb, alphabet_mapper * am); + virtual ~static_sequence_builder_wvtree() {} + virtual static_sequence * build(uint * seq, uint len); + + protected: + alphabet_mapper * am; + wt_coder * wc; + static_bitsequence_builder *bmb; +}; + +#endif diff --git a/src/static_sequence/static_sequence_builder_wvtree_noptrs.cpp b/src/static_sequence/static_sequence_builder_wvtree_noptrs.cpp new file mode 100644 index 0000000..5f34970 --- /dev/null +++ b/src/static_sequence/static_sequence_builder_wvtree_noptrs.cpp @@ -0,0 +1,31 @@ +/* static_sequence_builder_wvtree_noptrs.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * Sequence builder wavelet tree without pointers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +static_sequence_builder_wvtree_noptrs::static_sequence_builder_wvtree_noptrs(static_bitsequence_builder *bmb, alphabet_mapper * am) { + this->bmb = bmb; + this->am = am; +} + +static_sequence * static_sequence_builder_wvtree_noptrs::build(uint * seq, uint len) { + return new static_sequence_wvtree_noptrs(seq,len,bmb,am); +} diff --git a/src/static_sequence/static_sequence_builder_wvtree_noptrs.h b/src/static_sequence/static_sequence_builder_wvtree_noptrs.h new file mode 100644 index 0000000..16bd22b --- /dev/null +++ b/src/static_sequence/static_sequence_builder_wvtree_noptrs.h @@ -0,0 +1,44 @@ +/* static_sequence_builder_wvtree_noptrs.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * Sequence builder wavelet tree without pointers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _STATIC_SEQUENCE_BUILDER_WVTREE_NOPTRS_H +#define _STATIC_SEQUENCE_BUILDER_WVTREE_NOPTRS_H + +#include +#include +#include +#include + +/** Builder for wavelet trees without pointers + * @author Francisco Claude + */ +class static_sequence_builder_wvtree_noptrs : public static_sequence_builder { + public: + static_sequence_builder_wvtree_noptrs(static_bitsequence_builder *bmb, alphabet_mapper * am); + virtual ~static_sequence_builder_wvtree_noptrs() {} + virtual static_sequence * build(uint * seq, uint len); + + protected: + alphabet_mapper * am; + static_bitsequence_builder *bmb; +}; + +#endif diff --git a/src/static_sequence/static_sequence_gmr.cpp b/src/static_sequence/static_sequence_gmr.cpp new file mode 100644 index 0000000..d78518f --- /dev/null +++ b/src/static_sequence/static_sequence_gmr.cpp @@ -0,0 +1,195 @@ +/* static_sequence_gmr.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * GMR + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +using std::max; +static_sequence_gmr::static_sequence_gmr(uint * sequence, uint n, uint chunk_length, static_bitsequence_builder * bmb, static_sequence_builder * ssb) { + len = n; + if(len%chunk_length) len+=chunk_length-len%chunk_length; + uint * new_seq = new uint[len]; + sigma = 0; + for(uint i=0;ichunk_length = chunk_length; + build(new_seq,bmb,ssb); + delete [] new_seq; +} + +static_sequence_gmr::static_sequence_gmr() { +} + +static_sequence_gmr::~static_sequence_gmr() { + delete B; + for (uint i=0;ibuild(sequence+i*chunk_length, chunk_length); + //cout << "1." << i << endl; cout.flush(); + assert(chunk[i]!=NULL); + } + uint * ones = get_ones(sequence); + uint *B_bitmap = new uint[(2+len+(unsigned long long)num_chunks*sigma)/W+1]; + assert(B_bitmap!=NULL); + for (uint i=0;i<(2+len+(unsigned long long)num_chunks*sigma)/W+1;i++) + B_bitmap[i] = 0; + uint pos=0; + for (unsigned long long i=0;i<(unsigned long long)num_chunks*sigma;i++) { + for (uint j=0;jselect0(bp); + uint prev = rank_pos-bp+1; + uint sum = B->rank1(B->select0(bp+i)) - prev; + uint cr = chunk[i]->rank(c,j-i*chunk_length); + /*if(c==0) { + cout << "c=" << c << " j=" << j << endl; + cout << "i=" << i << endl; + cout << "bp=" << bp << endl; + cout << "rank_pos=" << rank_pos << endl; + cout << "prev=" << prev << endl; + cout << "sum=" << sum << endl; + cout << "cr=" << cr << endl; + }*/ + return sum + cr; +} + + +uint static_sequence_gmr::select(uint c, uint j) { + c++; + uint rank_pos = B->select0(c*(len/chunk_length)); + uint prev = B->rank1(rank_pos); + uint sel = prev+j; + uint block = (B->select1(sel)); + uint i = block-sel+1; + uint desp = B->rank1(B->select0((i)))-prev; + if (desp+1==0) desp=0; + uint rchunk = i%(len/chunk_length); + /*if(j==90) { + cout << "------------------------------" << endl; + cout << "c=" << c << " j=" << j << endl; + cout << "chunk_length=" << chunk_length << endl; + cout << "rank_pos=" << rank_pos << endl; + cout << "prev=" << prev << endl; + cout << "sel=" << sel << endl; + cout << "block=" << block << endl; + cout << "i=" << i << endl; + cout << "desp=" << desp << endl; + cout << "rchunk=" << rchunk << endl; + cout << "j-desp=" << j-desp << endl; + }*/ + return (rchunk*chunk_length)+chunk[rchunk]->select(c, j-desp); +} + + +uint static_sequence_gmr::access(uint j) { + return chunk[j/chunk_length]->access(j%chunk_length)-1; +} + + +uint static_sequence_gmr::size() { + uint s = 0; + for (uint i=0;isize(); + return s+B->size()+sizeof(static_sequence_gmr); +} + +uint static_sequence_gmr::save(FILE *fp) { + uint wr = GMR_HDR; + wr = fwrite(&wr,sizeof(uint),1,fp); + wr += fwrite(&len,sizeof(uint),1,fp); + wr += fwrite(&sigma,sizeof(uint),1,fp); + wr += fwrite(&chunk_length,sizeof(uint),1,fp); + if(wr!=4) return 1; + if(B->save(fp)) return 1; + for(uint i=0;isave(fp)) return 1; + return 0; +} + +static_sequence_gmr * static_sequence_gmr::load(FILE *fp) { + uint rd; + if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL; + if(rd!=GMR_HDR) return NULL; + static_sequence_gmr * ret = new static_sequence_gmr(); + rd = fread(&ret->len,sizeof(uint),1,fp); + rd += fread(&ret->sigma,sizeof(uint),1,fp); + rd += fread(&ret->chunk_length,sizeof(uint),1,fp); + if(rd!=3) { + delete ret; + return NULL; + } + ret->B = static_bitsequence::load(fp); + if(ret->B==NULL) { + delete ret; + return NULL; + } + ret->chunk = new static_sequence*[ret->len/ret->chunk_length]; + for(uint i=0;ilen/ret->chunk_length;i++) { + ret->chunk[i] = static_sequence::load(fp); + if(ret->chunk[i]==NULL) { + delete ret; + return NULL; + } + } + return ret; +} diff --git a/src/static_sequence/static_sequence_gmr.h b/src/static_sequence/static_sequence_gmr.h new file mode 100644 index 0000000..5159dea --- /dev/null +++ b/src/static_sequence/static_sequence_gmr.h @@ -0,0 +1,56 @@ +/* static_sequence_gmr.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * GMR + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _STATIC_SEQUENCE_GMR_H +#define _STATIC_SEQUENCE_GMR_H + +#include +#include +#include +#include +#include +#include +#include + +//using namespace std; + +class static_sequence_gmr : public static_sequence { + public: + static_sequence_gmr(uint * sequence, uint n, uint chunk_length, static_bitsequence_builder * bmb, static_sequence_builder * ssb); + ~static_sequence_gmr(); + virtual uint rank(uint c, uint j); + virtual uint select(uint c, uint j); + virtual uint access(uint j); + virtual uint size(); + virtual uint save(FILE *fp); + static static_sequence_gmr * load(FILE *fp); + + protected: + static_sequence_gmr(); + void build(uint * sequence, static_bitsequence_builder * bmb, static_sequence_builder * ssb); + uint * get_ones(uint * sequence); + + uint sigma, chunk_length; + static_sequence ** chunk; + static_bitsequence * B; +}; + +#endif diff --git a/src/static_sequence/static_sequence_gmr_chunk.cpp b/src/static_sequence/static_sequence_gmr_chunk.cpp new file mode 100644 index 0000000..5eb105e --- /dev/null +++ b/src/static_sequence/static_sequence_gmr_chunk.cpp @@ -0,0 +1,150 @@ +/* static_sequence_gmr_chunk.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * gmr_chunk + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "static_sequence_gmr_chunk.h" +using std::max; + +static_sequence_gmr_chunk::static_sequence_gmr_chunk(uint * sequence, uint chunk_length, static_bitsequence_builder *bmb, static_permutation_builder *pmb) { + sigma = 0; + for(uint i=0;ipermutation = pmb->build(pi,chunk_length); //createPerm(pi, chunk_length, t); + //cout << "a" << endl; cout.flush(); + assert(permutation!=NULL); + this->sigma = sigma; + this->len = chunk_length; + delete [] counter; +} + +static_sequence_gmr_chunk::static_sequence_gmr_chunk() { +} + +static_sequence_gmr_chunk::~static_sequence_gmr_chunk() { + delete X; + delete permutation; +} + + +uint static_sequence_gmr_chunk::access(uint j) { + uint invPerm = permutation->rev_pi(j); //inversePerm(permutation, j); + //cout << "invPerm=" << invPerm << endl; + uint rank_pos = X->select1(invPerm+1); + //cout << "rank_pos=" << rank_pos << endl; + uint ret = rank_pos - X->rank1(rank_pos);// - 1; + //cout << "ret = " << ret << endl; + return ret; +} + + +uint static_sequence_gmr_chunk::select(uint i, uint j) { + uint pos = X->select0(i+1) + j - i -1; + /*cout << "pos=" << pos << endl; + cout << "pos'=" << X->rank1(X->select0(i+1)+j) << endl; + cout << "perm_pos=" << permutation->pi(pos) << endl;*/ + return permutation->pi(pos); //getelemPerm(permutation, pos); +} + + +uint static_sequence_gmr_chunk::rank(uint i, uint j) { + uint ini = X->select0(i+1)-i; + uint ini_o = ini; + uint fin = X->select0(i+2); + if(finpi(ini) > j) return 0; + if(permutation->pi(ini) == j) return 1; + if(ini==fin) return 1; + while(ini < fin-1) { + uint med = (ini+fin)/2; + uint elem = permutation->pi(med); //getelemPerm(permutation, med); + if(elem >= j) fin = med; + else ini = med; + } + while(fin>ini_o && permutation->pi(fin)>j) fin--; + return fin-ini_o+1; +} + + +uint static_sequence_gmr_chunk::size() { + return sizeof(static_sequence_gmr_chunk)+permutation->size()+X->size(); +} + +uint static_sequence_gmr_chunk::save(FILE *fp) { + uint wr = GMR_CHUNK_HDR; + wr = fwrite(&wr,sizeof(uint),1,fp); + wr += fwrite(&len,sizeof(uint),1,fp); + wr += fwrite(&sigma,sizeof(uint),1,fp); + if(wr!=3) return 1; + if(X->save(fp)) return 1; + if(permutation->save(fp)) return 1; + return 0; +} + +static_sequence_gmr_chunk * static_sequence_gmr_chunk::load(FILE *fp) { + uint rd; + if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL; + if(rd!=GMR_CHUNK_HDR) return NULL; + static_sequence_gmr_chunk * ret = new static_sequence_gmr_chunk(); + rd = fread(&ret->len,sizeof(uint),1,fp); + rd += fread(&ret->sigma,sizeof(uint),1,fp); + ret->X = static_bitsequence::load(fp); + ret->permutation = static_permutation::load(fp); + if(rd!=2 || ret->X==NULL || ret->permutation==NULL) { + /*cout << "rd=" << rd << endl; + cout << "X =" << ret->X << endl; + cout << "P =" << ret->permutation << endl;*/ + delete ret; + return NULL; + } + return ret; +} diff --git a/src/static_sequence/static_sequence_gmr_chunk.h b/src/static_sequence/static_sequence_gmr_chunk.h new file mode 100644 index 0000000..8e7a11a --- /dev/null +++ b/src/static_sequence/static_sequence_gmr_chunk.h @@ -0,0 +1,71 @@ +/* static_sequence_gmr_chunk.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * gmr_chunk + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _STATIC_SEQUENCE_GMR_CHUNK_H +#define _STATIC_SEQUENCE_GMR_CHUNK_H + +#include +#include +#include +#include +#include +#include +#include +#include + +//using namespace std; + +/** Implementation of the Chunk of Golynski et al's rank/select + * data structure [1]. + * + * [1] A. Golynski and I. Munro and S. Rao. + * Rank/select operations on large alphabets: a tool for text indexing. + * SODA 06. + * + * @author Francisco Claude + */ +class static_sequence_gmr_chunk: public static_sequence { + public: + /** Builds the structures needed for the chunk */ + static_sequence_gmr_chunk(uint * sequence, uint chunk_length, static_bitsequence_builder *bmb, static_permutation_builder *pmb); + + /** Destroy the chunk */ + ~static_sequence_gmr_chunk(); + + virtual uint access(uint j); + virtual uint select(uint i, uint j); + virtual uint rank(uint i, uint j); + virtual uint size(); + virtual uint save(FILE *fp); + static static_sequence_gmr_chunk * load(FILE *fp); + + protected: + /** Bitmap */ + static_bitsequence * X; + /** Permutation */ + static_permutation * permutation; + /** Size of the alphabet */ + uint sigma; + /** Length of the chunk */ + //uint chunk_length; + static_sequence_gmr_chunk(); +}; +#endif diff --git a/src/static_sequence/static_sequence_wvtree.cpp b/src/static_sequence/static_sequence_wvtree.cpp new file mode 100644 index 0000000..ca93594 --- /dev/null +++ b/src/static_sequence/static_sequence_wvtree.cpp @@ -0,0 +1,150 @@ +/* static_sequence_wvtree.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_sequence_wvtree definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +static_sequence_wvtree::static_sequence_wvtree(uint * symbols, uint n, wt_coder * c, static_bitsequence_builder * bmb, alphabet_mapper * am) { + this->n = n; + for(uint i=0;imap(symbols[i]); + this->am = am; + am->use(); + this->c=c; + c->use(); + root = new wt_node_internal(symbols, n, 0, c, bmb); + for(uint i=0;iunmap(symbols[i]); +} + +static_sequence_wvtree::static_sequence_wvtree(uchar * symbols, uint n, wt_coder * c, static_bitsequence_builder * bmb, alphabet_mapper * am) { + this->n = n; + for(uint i=0;imap((uint)symbols[i]); + this->am = am; + am->use(); + this->c=c; + c->use(); + uint *done = new uint[n/W+1]; + for (uint i = 0; i < n/W+1; i++) + done[i] = 0; + root = new wt_node_internal(symbols, n, 0, c, bmb, 0, done); + delete [] done; + delete [] symbols; + symbols = 0; // Already deleted! +// for(uint i=0;iunmap((uint)symbols[i]); +} + +static_sequence_wvtree::static_sequence_wvtree() {} + +static_sequence_wvtree::~static_sequence_wvtree() { + delete root; + am->unuse(); + c->unuse(); +} + +uint static_sequence_wvtree::rank(uint symbol, uint pos) { + return root->rank(am->map(symbol), pos, 0, c); +} + +uint static_sequence_wvtree::rankLessThan(uint &symbol, uint pos) { + uint s = am->map(symbol); +// std::cout << "lessthan..." << std::endl; + uint r = root->rankLessThan(s, pos); + symbol = am->unmap(s); + return r; +} + + +uint static_sequence_wvtree::count(uint s) { + return root->rank(am->map(s), len-1, 0, c); +} + +uint static_sequence_wvtree::select(uint symbol, uint pos) { + uint ret = root->select(am->map(symbol), pos, 0, c); + if(ret==((uint)-1)) return (uint)-1; + return ret-1; +} + +uint static_sequence_wvtree::access(uint pos) { + return am->unmap(root->access(pos)); +} + +vector static_sequence_wvtree::access(uint i, uint j, uint min, uint max) +{ + vector resultSet; + root->access(resultSet, i, j, am->map(min), am->map(max), c->depth()-1, 0); + for (vector::iterator it = resultSet.begin(); it != resultSet.end(); ++it) + *it = am->unmap(*it); + return resultSet; +} + +vector static_sequence_wvtree::accessAll(uint i, uint j) +{ + vector resultSet; + if (j < i) + return resultSet; + + // resultSet.reserve(j-i+1); // avoid reallocation + root->access(resultSet, i, j); + for (vector::iterator it = resultSet.begin(); it != resultSet.end(); ++it) + *it = am->unmap(*it); + return resultSet; +} + +uint static_sequence_wvtree::count(uint i, uint j, uint min, uint max) +{ + return root->access(i, j, am->map(min), am->map(max), c->depth()-1, 0); +} + + +uint static_sequence_wvtree::size() { + /*cout << "WT: " << root->size() << endl; + cout << "Coder: " << c->size() << endl; + cout << "AM: " << am->size() << endl;*/ + return sizeof(static_sequence_wvtree)+sizeof(uint)+root->size()+am->size()+c->size(); +} + +uint static_sequence_wvtree::save(FILE * fp) { + uint wr = WVTREE_HDR; + wr = fwrite(&wr,sizeof(uint),1,fp); + if(wr!=1) return 1; + wr = fwrite(&n,sizeof(uint),1,fp); + if(wr!=1) return 1; + if(c->save(fp)) return 1; + if(am->save(fp)) return 1; + if(root->save(fp)) return 1; + return 0; +} + +static_sequence_wvtree * static_sequence_wvtree::load(FILE *fp) { + uint rd; + if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL; + if(rd!=WVTREE_HDR) return NULL; + static_sequence_wvtree * ret = new static_sequence_wvtree(); + if(fread(&ret->n,sizeof(uint),1,fp)!=1) return NULL; + ret->c = wt_coder::load(fp); + ret->c->use(); + ret->am = alphabet_mapper::load(fp); + ret->am->use(); + ret->root = wt_node::load(fp); + return ret; +} diff --git a/src/static_sequence/static_sequence_wvtree.h b/src/static_sequence/static_sequence_wvtree.h new file mode 100644 index 0000000..6a112fb --- /dev/null +++ b/src/static_sequence/static_sequence_wvtree.h @@ -0,0 +1,97 @@ +/* static_sequence_wvtree.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_sequence_wvtree definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef STATIC_SEQUENCE_WVTREE_H +#define STATIC_SEQUENCE_WVTREE_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +//using namespace std; + +/** Wavelet tree implementation using pointers. + * + * @author Francisco Claude + */ +class static_sequence_wvtree : public static_sequence { + public: + + /** Builds a Wavelet Tree for the string + * pointed by symbols assuming its length + * equals n */ + static_sequence_wvtree(uint * symbols, uint n, wt_coder * coder, static_bitsequence_builder * bmb, alphabet_mapper * am); + + static_sequence_wvtree(uchar * symbols, uint n, wt_coder * coder, static_bitsequence_builder * bmb, alphabet_mapper * am); + + virtual ~static_sequence_wvtree(); + + virtual uint rank(uint symbol, uint pos); + virtual uint rankLessThan(uint &symbol, uint pos); + + virtual uint select(uint symbol, uint i); + + virtual uint access(uint pos); + virtual uint access(uint pos, uint &rank) + { + return root->access(pos, rank); + } + + // Returns all elements from interval [i, j] such that + // their value is in [min, max]. + virtual vector access(uint i, uint j, uint min, uint max); + virtual vector accessAll(uint i, uint j); + virtual uint count(uint i, uint j, uint min, uint max); + + virtual uint count(uint s); + + virtual uint size(); + + virtual uint save(FILE * fp); + static static_sequence_wvtree * load(FILE *fp); + + protected: + + static_sequence_wvtree(); + + wt_node * root; + wt_coder * c; + alphabet_mapper * am; + //bitmap_builder * bmb; + + /** Length of the string. */ + uint n; + + /** Height of the Wavelet Tree. */ + uint max_v; + + /** Flag for testing for correcteness. */ + bool test; + + +}; +#endif /* _STATIC_SEQUENCE_WVTREE_H */ diff --git a/src/static_sequence/static_sequence_wvtree_noptrs.cpp b/src/static_sequence/static_sequence_wvtree_noptrs.cpp new file mode 100644 index 0000000..18ddcbd --- /dev/null +++ b/src/static_sequence/static_sequence_wvtree_noptrs.cpp @@ -0,0 +1,563 @@ +/* static_sequence_wvtree_noptrs.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_sequence_wvtree_noptrs definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +using std::min; +using std::max; +static_sequence_wvtree_noptrs::static_sequence_wvtree_noptrs(uint * symbols, uint n, static_bitsequence_builder * bmb, alphabet_mapper * am, bool deleteSymbols) { + this->n=n; + this->am=am; + am->use(); + for(uint i=0;imap(symbols[i]); + max_v=max_value(symbols,n); + height=bits(max_v); + uint *occurrences=new uint[max_v+1]; + for(uint i=0;i<=max_v;i++) occurrences[i]=0; + for(uint i=0;ibuild(oc,new_n+1); + delete [] occurrences; + this->n = new_n; + uint ** _bm=new uint*[height]; + for(uint i=0;ibuild(_bm[i],new_n); + delete [] _bm[i]; + } + delete [] _bm; + + if (!deleteSymbols) + for(uint i=0;iunmap(symbols[i]); + +// delete [] new_symb; // already deleted in build_level()! + delete [] oc; +} + +// symbols is an array of elements of "width" bits +static_sequence_wvtree_noptrs::static_sequence_wvtree_noptrs(uint * symbols, uint n, unsigned width, static_bitsequence_builder * bmb, alphabet_mapper * am, bool deleteSymbols) { + this->n=n; + this->am=am; + am->use(); + for(uint i=0;imap(get_field(symbols, width, i))); + max_v=max_value(symbols, width, n); + height=bits(max_v); + uint *occurrences=new uint[max_v+1]; + for(uint i=0;i<=max_v;i++) occurrences[i]=0; + for(uint i=0;ibuild(oc,new_n+1); + delete [] occurrences; + this->n = new_n; + uint ** _bm=new uint*[height]; + for(uint i=0;ibuild(_bm[i],new_n); + delete [] _bm[i]; + } + delete [] _bm; + + if (!deleteSymbols) + for(uint i=0;iunmap(get_field(symbols, width, i))); + +// delete [] new_symb; // already deleted in build_level()! + delete [] oc; +} + +static_sequence_wvtree_noptrs::static_sequence_wvtree_noptrs() { +} + +static_sequence_wvtree_noptrs::~static_sequence_wvtree_noptrs() { + for(uint i=0;iunuse(); +} + +uint static_sequence_wvtree_noptrs::save(FILE *fp) { + uint wr = WVTREE_NOPTRS_HDR; + wr = fwrite(&wr,sizeof(uint),1,fp); + wr += fwrite(&n,sizeof(uint),1,fp); + wr += fwrite(&max_v,sizeof(uint),1,fp); + wr += fwrite(&height,sizeof(uint),1,fp); + if(wr!=4) return 1; + if(am->save(fp)) return 1; + for(uint i=0;isave(fp)) return 1; + if(occ->save(fp)) return 1; + return 0; +} + +static_sequence_wvtree_noptrs * static_sequence_wvtree_noptrs::load(FILE *fp) { + uint rd; + if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL; + if(rd!=WVTREE_NOPTRS_HDR) return NULL; + static_sequence_wvtree_noptrs * ret = new static_sequence_wvtree_noptrs(); + rd = fread(&ret->n,sizeof(uint),1,fp); + rd += fread(&ret->max_v,sizeof(uint),1,fp); + rd += fread(&ret->height,sizeof(uint),1,fp); + if(rd!=3) { + delete ret; + return NULL; + } + ret->am = alphabet_mapper::load(fp); + if(ret->am==NULL) { + delete ret; + return NULL; + } + ret->am->use(); + ret->bitstring = new static_bitsequence*[ret->height]; + for(uint i=0;iheight;i++) { + ret->bitstring[i] = static_bitsequence::load(fp); + if(ret->bitstring[i]==NULL){ + delete ret; + return NULL; + } + } + ret->occ = static_bitsequence::load(fp); + if(ret->occ==NULL) { + delete ret; + return NULL; + } + return ret; +} + +uint static_sequence_wvtree_noptrs::access(uint pos) { + uint level=0; + uint ret=0; + uint start=0; + uint end=n-1; + while(level=start && pos<=end); + if(bitstring[level]->access(pos)) { + ret=set(ret,level); + pos=bitstring[level]->rank1(pos-1)-bitstring[level]->rank1(start-1); + start=(bitstring[level]->rank1(end)-bitstring[level]->rank1(start-1)); + start=end-start+1; + pos+=start; + } + else { + pos=pos-start-(bitstring[level]->rank1(pos)-bitstring[level]->rank1(start-1)); + end=end-start-(bitstring[level]->rank1(end)-bitstring[level]->rank1(start-1)); + end+=start; + pos+=start; + } + level++; + } + return am->unmap(ret); +} + +uint static_sequence_wvtree_noptrs::rank(uint symbol, uint pos) { + symbol = am->map(symbol); + uint level=0; + uint start=0; + uint end=n-1; + uint count=0; + while(levelrank1(pos)-bitstring[level]->rank1(start-1)-1; + count=pos+1; + start=(bitstring[level]->rank1(end)-bitstring[level]->rank1(start-1)); + start=end-start+1; + pos+=start; + } + else { + pos=pos-start+bitstring[level]->rank1(start-1)-bitstring[level]->rank1(pos); + count=pos+1; + end=end-start-(bitstring[level]->rank1(end)-bitstring[level]->rank1(start-1)); + end+=start; + pos+=start; + } + level++; + if(count==0) return 0; + } + return count; +} + +vector static_sequence_wvtree_noptrs::access(uint i, uint j, uint min, uint max) +{ + vector resultSet; +// cout << "height = " << height << endl; + access(resultSet, i, j, am->map(min), am->map(max), 0, 0, 0, n-1); + return resultSet; +} + +void static_sequence_wvtree_noptrs::access(vector &result, uint i, uint j, uint min, uint max, uint l, uint pivot, uint start, uint end) +{ + uint symbol = pivot | (1 << (height-l-1)); + //std::cout << "At l = " << l << ", [" << i << ", " << j << "], [" << min << ", " << max << "], [" << start << ", " << end << "], symbol = " << symbol << std::endl; + + if (l == height) + { + if (i <= j && pivot >= min && pivot <= max && start <= end) + result.push_back(am->unmap((int)pivot)); + return; + } + + if (j < i || max < min || end < start) + return; + + if (min < symbol) + { + // Recurse left + uint newi = i + bitstring[l]->rank1(start-1) - bitstring[l]->rank1(i-1); + uint newend = end - (bitstring[l]->rank1(end) - bitstring[l]->rank1(start-1)); + uint newj = j + bitstring[l]->rank1(start-1) - bitstring[l]->rank1(j) + 1; + + uint newmax = max < symbol - 1 ? max : symbol - 1; + if (newj > start) + access(result, newi, newj-1, min, newmax, l+1, pivot, start, newend); + } + + if (max >= symbol) + { + // Recurse right + uint newstart = (bitstring[l]->rank1(end)-bitstring[l]->rank1(start-1)); + newstart = end - newstart + 1; + uint newi = bitstring[l]->rank1(i-1)-bitstring[l]->rank1(start-1) + newstart; + uint newj = bitstring[l]->rank1(j)-bitstring[l]->rank1(start-1) + newstart; + + uint newmin = min > symbol ? min : symbol; + if (newj > newstart) + access(result, newi, newj-1, newmin, max, l+1, symbol, newstart, end); + } +} + + +vector static_sequence_wvtree_noptrs::accessAll(uint i, uint j) +{ + vector resultSet; + if (j < i) + return resultSet; + + resultSet.reserve(j-i+1); + accessAll(resultSet, i, j, 0, 0, 0, n-1); + return resultSet; +} + +void static_sequence_wvtree_noptrs::accessAll(vector &result, uint i, uint j, uint l, uint pivot, uint start, uint end) +{ + uint symbol = pivot | (1 << (height-l-1)); +// std::cout << "At l = " << l << ", [" << i << ", " << j << "], [" << start << ", " << end << "], symbol = " << symbol << std::endl; + + if (l == height) + { + if (i <= j && start <= end) + result.push_back(am->unmap((int)pivot)); + return; + } + + if (j < i || end < start) + return; + + { + // Recurse left + uint newi = i + bitstring[l]->rank1(start-1) - bitstring[l]->rank1(i-1); + uint newend = end - (bitstring[l]->rank1(end) - bitstring[l]->rank1(start-1)); + uint newj = j + bitstring[l]->rank1(start-1) - bitstring[l]->rank1(j) + 1; + + if (newj > start) + accessAll(result, newi, newj-1, l+1, pivot, start, newend); + } + + { + // Recurse right + uint newstart = (bitstring[l]->rank1(end)-bitstring[l]->rank1(start-1)); + newstart = end - newstart + 1; + uint newi = bitstring[l]->rank1(i-1)-bitstring[l]->rank1(start-1) + newstart; + uint newj = bitstring[l]->rank1(j)-bitstring[l]->rank1(start-1) + newstart; + + if (newj > newstart) + accessAll(result, newi, newj-1, l+1, symbol, newstart, end); + } +} + + +uint static_sequence_wvtree_noptrs::count(uint i, uint j, uint min, uint max) +{ + return count(i, j, am->map(min), am->map(max), 0, 0, 0, n-1); +} + +uint static_sequence_wvtree_noptrs::count(uint i, uint j, uint min, uint max, uint l, uint pivot, uint start, uint end) +{ + uint symbol = pivot | (1 << (height-l-1)); + //std::cout << "At l = " << l << ", [" << i << ", " << j << "], [" << min << ", " << max << "], [" << start << ", " << end << "], symbol = " << symbol << std::endl; + + if (l == height) + { + if (i <= j && pivot >= min && pivot <= max && start <= end) + return 1; + return 0; + } + + if (j < i || max < min || end < start) + return 0; + + uint result = 0; + if (min < symbol) + { + // Recurse left + uint newi = i + bitstring[l]->rank1(start-1) - bitstring[l]->rank1(i-1); + uint newend = end - (bitstring[l]->rank1(end) - bitstring[l]->rank1(start-1)); + uint newj = j + bitstring[l]->rank1(start-1) - bitstring[l]->rank1(j) + 1; + + uint newmax = max < symbol - 1 ? max : symbol - 1; + if (newj > start) + result += count(newi, newj-1, min, newmax, l+1, pivot, start, newend); + } + + if (max >= symbol) + { + // Recurse right + uint newstart = (bitstring[l]->rank1(end)-bitstring[l]->rank1(start-1)); + newstart = end - newstart + 1; + uint newi = bitstring[l]->rank1(i-1)-bitstring[l]->rank1(start-1) + newstart; + uint newj = bitstring[l]->rank1(j)-bitstring[l]->rank1(start-1) + newstart; + + uint newmin = min > symbol ? min : symbol; + if (newj > newstart) + result += count(newi, newj-1, newmin, max, l+1, symbol, newstart, end); + } + return result; +} + + + +inline uint get_start(uint symbol, uint mask) { + return symbol&mask; +} + +inline uint get_end(uint symbol, uint mask) { + return get_start(symbol,mask)+!mask+1; +} + +uint static_sequence_wvtree_noptrs::select(uint symbol, uint j) { + symbol = am->map(symbol); + uint mask = (1<select1(start)+1); + end = occ->select1(end+1)-1; + if(is_set(symbol,level)) { + uint ones_start = bitstring[level]->rank1(start-1); + pos = bitstring[level]->select1(ones_start+pos)-start+1; + } + else { + uint ones_start = bitstring[level]->rank1(start-1); + pos = bitstring[level]->select0(start-ones_start+pos)-start+1; + } + mask <<=1; + sum <<=1; + if(level==0) break; + level--; + } + return pos-1; +} + +uint static_sequence_wvtree_noptrs::size() { + uint ptrs = sizeof(static_sequence_wvtree_noptrs)+height*sizeof(static_sequence*); + uint bytesBitstrings = 0; + for(uint i=0;isize(); + return bytesBitstrings+occ->size()+ptrs; +} + +void static_sequence_wvtree_noptrs::build_level(uint **bm, uint *symbols, uint level, uint length, uint offset) { + if(level==height) + { + delete [] symbols; + return; + } + uint cleft=0; + for(uint i=0;i>= 1; + } + return ret; +} + +bool static_sequence_wvtree_noptrs::is_set(uint val, uint ind) { + assert(ind +#include +#include +#include +#include +#include +#include + +//using namespace std; + +class static_sequence_wvtree_noptrs : public static_sequence { + public: + + /** Builds a Wavelet Tree for the string + * pointed by symbols assuming its length + * equals n and uses bmb to build the bitsequence */ + static_sequence_wvtree_noptrs(uint * symbols, uint n, static_bitsequence_builder * bmb, alphabet_mapper * am, bool deleteSymbols = false); + + // symbols is an array of elements of "width" bits. + static_sequence_wvtree_noptrs(uint * symbols, uint n, unsigned width, static_bitsequence_builder * bmb, alphabet_mapper * am, bool deleteSymbols = false); + + /** Destroys the Wavelet Tree */ + virtual ~static_sequence_wvtree_noptrs(); + + virtual uint rank(uint symbol, uint pos); + virtual uint select(uint symbol, uint i); + virtual uint access(uint pos); + virtual uint size(); + + virtual vector access(uint i, uint j, uint min, uint max); + virtual vector accessAll(uint i, uint j); + virtual uint count(uint i, uint j, uint min, uint max); + + virtual uint save(FILE *fp); + static static_sequence_wvtree_noptrs * load(FILE *fp); + + protected: + void access(vector &result, uint i, uint j, uint min, uint max, uint l, uint pivot, uint start, uint end); + void accessAll(vector &result, uint i, uint j, uint l, uint pivot, uint start, uint end); + uint count(uint i, uint j, uint min, uint max, uint l, uint pivot, uint start, uint end); + + static_sequence_wvtree_noptrs(); + + alphabet_mapper * am; + /** Only one bit-string for the Wavelet Tree. */ + static_bitsequence **bitstring, *occ; + + /** Length of the string. */ + uint n; + + /** Height of the Wavelet Tree. */ + uint height,max_v; + + /** Obtains the maximum value from the string + * symbols of length n */ + uint max_value(uint * symbols, uint n); + uint max_value(uint * symbols, unsigned width, uint n); + + /** How many bits are needed to represent val */ + uint bits(uint val); + + /** Returns true if val has its ind-th bit set + * to one. */ + bool is_set(uint val, uint ind); + + /** Sets the ind-th bit in val */ + uint set(uint val, uint ind); + + /** Recursive function for building the Wavelet Tree. */ + void build_level(uint **bm, uint *symbols, uint level, uint length, uint offset); + void build_level(uint **bm, uint *symbols, unsigned width, uint level, uint length, uint offset); +}; +#endif diff --git a/src/static_sequence/wt_coder.cpp b/src/static_sequence/wt_coder.cpp new file mode 100644 index 0000000..4735ee5 --- /dev/null +++ b/src/static_sequence/wt_coder.cpp @@ -0,0 +1,46 @@ +/* wt_coder.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * wt_coder definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +wt_coder::wt_coder() { + user_count=0; +} + +void wt_coder::use() { + user_count++; +} + +void wt_coder::unuse() { + user_count--; + if(user_count==0) delete this; +} + +wt_coder * wt_coder::load(FILE *fp) { + uint rd; + if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL; + fseek(fp,-sizeof(uint),SEEK_CUR); + switch(rd) { + case WT_CODER_HUFF_HDR: return wt_coder_huff::load(fp); + case WT_CODER_BINARY_HDR: return wt_coder_binary::load(fp); + } + return NULL; +} diff --git a/src/static_sequence/wt_coder.h b/src/static_sequence/wt_coder.h new file mode 100644 index 0000000..c5dc91e --- /dev/null +++ b/src/static_sequence/wt_coder.h @@ -0,0 +1,64 @@ +/* wt_coder.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * wt_coder definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef wt_coder_h +#define wt_coder_h + +#include +#include + +//using namespace std; + +#define WT_CODER_HUFF_HDR 2 +#define WT_CODER_BINARY_HDR 3 + +/** Coder that defines the shape of a wavelet tree + * + * @author Francisco Claude + */ +class wt_coder { + public: + wt_coder(); + virtual void use(); + virtual void unuse(); + virtual ~wt_coder() {}; + /** Tells if at level l the symbol is represented by a one or a zero */ + virtual bool is_set(uint symbol, uint l)=0; + /** Tells if the path of symbol becomes unique at level l */ + virtual bool done(uint symbol, uint l)=0; + /** Returns the size of the coder */ + virtual uint size()=0; + /** Returns the depth of the tree */ + virtual uint depth() { + return -1; // Implemented in wt_coder_binary + } + /** Saves the coder to a file, returns 0 in case of success */ + virtual uint save(FILE *fp)=0; + /** Loads a coder from a file, returns NULL in case of error */ + static wt_coder * load(FILE *fp); + protected: + uint user_count; +}; + +#include +#include + +#endif diff --git a/src/static_sequence/wt_coder_binary.cpp b/src/static_sequence/wt_coder_binary.cpp new file mode 100644 index 0000000..a49efc8 --- /dev/null +++ b/src/static_sequence/wt_coder_binary.cpp @@ -0,0 +1,74 @@ +/* wt_coder_binary.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * wt_coder_binary definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +using std::min; +using std::max; +wt_coder_binary::wt_coder_binary(uint * seq, uint n, alphabet_mapper * am) { + uint max_v = 0; + for(uint i=0;imap(seq[i]),max_v); + h=bits(max_v); +} + +wt_coder_binary::wt_coder_binary(uchar * seq, uint n, alphabet_mapper * am) { + uint max_v = 0; + for(uint i=0;imap((uint)seq[i]),max_v); + h=bits(max_v); +} + +wt_coder_binary::wt_coder_binary() {} + +wt_coder_binary::~wt_coder_binary() {} + +bool wt_coder_binary::is_set(uint symbol, uint l) { + if((1<<(h-l-1))&symbol) return true; + return false; +} + +bool wt_coder_binary::done(uint symbol, uint l) { + if(l==h) return true; + return false; +} + +uint wt_coder_binary::size() { + return sizeof(wt_coder_binary); +} + +uint wt_coder_binary::save(FILE *fp) { + uint wr = WT_CODER_BINARY_HDR; + wr = fwrite(&wr,sizeof(uint),1,fp); + wr += fwrite(&h,sizeof(uint),1,fp); + return wr-2; +} + +wt_coder_binary * wt_coder_binary::load(FILE *fp) { + uint rd; + if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL; + if(rd!=WT_CODER_BINARY_HDR) return NULL; + wt_coder_binary * ret = new wt_coder_binary(); + if(fread(&ret->h,sizeof(uint),1,fp)!=1) { + delete ret; + return NULL; + } + return ret; +} diff --git a/src/static_sequence/wt_coder_binary.h b/src/static_sequence/wt_coder_binary.h new file mode 100644 index 0000000..1df2cbe --- /dev/null +++ b/src/static_sequence/wt_coder_binary.h @@ -0,0 +1,54 @@ +/* wt_coder_binary.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * wt_coder_binary definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#ifndef wt_coder_binary_h +#define wt_coder_binary_h + +#include +#include +#include + +/** Considers the binary representation of the symbols as the code + * + * @author Francisco Claude + */ +class wt_coder_binary: public wt_coder { + public: + /** Buils a wt_coder_binary using the sequence of length n and the alphabet_mapper + * to determine the length of the binary codes */ + wt_coder_binary(uint * seq, uint n, alphabet_mapper * am); + wt_coder_binary(uchar * seq, uint n, alphabet_mapper * am); + virtual ~wt_coder_binary(); + virtual bool is_set(uint symbol, uint l); + virtual bool done(uint symbol, uint l); + virtual uint depth() { return h; } + virtual uint size(); + virtual uint save(FILE *fp); + static wt_coder_binary * load(FILE *fp); + + protected: + wt_coder_binary(); + uint h; +}; + +#endif + diff --git a/src/static_sequence/wt_coder_huff.cpp b/src/static_sequence/wt_coder_huff.cpp new file mode 100644 index 0000000..5ba633b --- /dev/null +++ b/src/static_sequence/wt_coder_huff.cpp @@ -0,0 +1,89 @@ +/* wt_coder_huff.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * wt_coder_huff definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +wt_coder_huff::wt_coder_huff(uint * symbs, uint n, alphabet_mapper * am) { + for(uint i=0;imap(symbs[i]); + hc = new huffman_codes(symbs, n); + buffer = new uint[hc->max_length()/W+1]; + s_len = 0; last_symbol = (uint)-1; + for(uint i=0;iunmap(symbs[i]); +} + +wt_coder_huff::wt_coder_huff(uchar * symbs, uint n, alphabet_mapper * am) { + for(uint i=0;imap((uint)symbs[i]); + hc = new huffman_codes(symbs, n); + buffer = new uint[hc->max_length()/W+1]; + s_len = 0; last_symbol = (uint)-1; + for(uint i=0;iunmap((uint)symbs[i]); +} + +wt_coder_huff::wt_coder_huff() {} + +wt_coder_huff::~wt_coder_huff() { + delete hc; + delete [] buffer; +} + +bool wt_coder_huff::is_set(uint symbol, uint l) { + if(symbol!=last_symbol) { + s_len = (uint)hc->encode(symbol, buffer, (ulong)0); + last_symbol = symbol; + } + return bitget(buffer,l); +} + +bool wt_coder_huff::done(uint symbol, uint l) { + if(symbol!=last_symbol) { + s_len = (uint)hc->encode(symbol, buffer, (ulong)0); + last_symbol = symbol; + } + return l==s_len; +} + +uint wt_coder_huff::size() { + return 2*sizeof(uint)+sizeof(wt_coder_huff)+hc->size()+(hc->max_length()/W+1)*sizeof(uint); +} + +uint wt_coder_huff::save(FILE * fp) { + uint wr = WT_CODER_HUFF_HDR; + wr = fwrite(&wr,sizeof(uint),1,fp); + if(wr!=1) return 1; + if(hc->save(fp)) return 1; + //if(am->save(fp)) return 1; + return 0; +} + +wt_coder_huff * wt_coder_huff::load(FILE *fp) { + uint rd; + if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL; + if(rd!=WT_CODER_HUFF_HDR) return NULL; + wt_coder_huff * ret = new wt_coder_huff(); + ret->hc = huffman_codes::load(fp); + ret->buffer = new uint[ret->hc->max_length()/W+1]; + ret->s_len = 0; ret->last_symbol = (uint)-1; + return ret; +} diff --git a/src/static_sequence/wt_coder_huff.h b/src/static_sequence/wt_coder_huff.h new file mode 100644 index 0000000..1811ff0 --- /dev/null +++ b/src/static_sequence/wt_coder_huff.h @@ -0,0 +1,55 @@ +/* wt_coder_huff.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * wt_coder_huff definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef wt_coder_huff_h +#define wt_coder_huff_h + +#include +#include +#include +#include + +/** Uses huffman codes to determine the shape of the wavelet tree + * + * @author Francisco Claude + */ +class wt_coder_huff: public wt_coder { + public: + /** Buils a wt_coder_huff using the sequence of length n and the alphabet_mapper + * to determine the huffman codes */ + wt_coder_huff(uint *symbs, uint n, alphabet_mapper * am); + wt_coder_huff(uchar *symbs, uint n, alphabet_mapper * am); + virtual ~wt_coder_huff(); + virtual bool is_set(uint symbol, uint l); + virtual bool done(uint symbol, uint l); + virtual uint size(); + virtual uint save(FILE *fp); + static wt_coder_huff * load(FILE *fp); + //uint * get_buffer(uint symbol, uint *n); + + protected: + wt_coder_huff(); + huffman_codes * hc; + uint * buffer; + uint last_symbol, s_len; +}; + +#endif diff --git a/src/static_sequence/wt_node.cpp b/src/static_sequence/wt_node.cpp new file mode 100644 index 0000000..5ab20ce --- /dev/null +++ b/src/static_sequence/wt_node.cpp @@ -0,0 +1,34 @@ +/* wt_node.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * wt_node + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +wt_node * wt_node::load(FILE *fp) { + uint rd; + if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL; + if(rd==WT_NODE_NULL_HDR) return NULL; + fseek(fp,-sizeof(uint),SEEK_CUR); + switch(rd) { + case WT_NODE_INTERNAL_HDR: return wt_node_internal::load(fp); + case WT_NODE_LEAF_HDR: return wt_node_leaf::load(fp); + } + return NULL; +} diff --git a/src/static_sequence/wt_node.h b/src/static_sequence/wt_node.h new file mode 100644 index 0000000..15ce49d --- /dev/null +++ b/src/static_sequence/wt_node.h @@ -0,0 +1,60 @@ +/* wt_node.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * wt_node + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef wt_node_h +#define wt_node_h + +#include +#include +#include + +#define WT_NODE_NULL_HDR 0 +#define WT_NODE_INTERNAL_HDR 2 +#define WT_NODE_LEAF_HDR 3 + +/** Base clase for nodes in the wavelet tree + * + * @author Francisco Claude + */ +class wt_node { + public: + virtual ~wt_node() {} + virtual uint rank(uint symbol, uint pos, uint l, wt_coder * c)=0; + virtual uint rankLessThan(uint &symbol, uint pos) = 0; + virtual uint select(uint symbol, uint pos, uint l, wt_coder * c)=0; + virtual uint access(uint pos)=0; + virtual uint access(uint pos, uint &rank) + { + assert(0); // Implemented only in wt_node_internal + return -1; + } + virtual void access(std::vector &result, uint i, uint j, uint min, uint max, uint l, uint pivot)=0; + virtual void access(std::vector &result, uint i, uint j)=0; + virtual uint access(uint i, uint j, uint min, uint max, uint l, uint pivot)=0; + virtual uint size()=0; + virtual uint save(FILE *fp)=0; + static wt_node * load(FILE *fp); +}; + +#include +#include + +#endif diff --git a/src/static_sequence/wt_node_internal.cpp b/src/static_sequence/wt_node_internal.cpp new file mode 100644 index 0000000..0232a73 --- /dev/null +++ b/src/static_sequence/wt_node_internal.cpp @@ -0,0 +1,370 @@ +/* wt_node_internal.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * wt_node_internal + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +wt_node_internal::wt_node_internal(uint * symbols, uint n, uint l, wt_coder * c, static_bitsequence_builder * bmb) { + uint * ibitmap = new uint[n/W+1]; + for(uint i=0;iis_set(symbols[i],l)) + bitset(ibitmap,i); + bitmap = bmb->build(ibitmap, n); + delete [] ibitmap; + uint count_right = bitmap->rank1(n-1); + uint count_left = n-count_right+1; + uint * left = new uint[count_left+1]; + uint * right = new uint[count_right+1]; + count_right = count_left = 0; + bool match_left = true, match_right = true; + for(uint i=0;iaccess(i)) { + right[count_right++]=symbols[i]; + if(count_right>1) + if(right[count_right-1]!=right[count_right-2]) + match_right = false; + } + else { + left[count_left++]=symbols[i]; + if(count_left>1) + if(left[count_left-1]!=left[count_left-2]) + match_left = false; + } + } + if(count_left>0) { + if(match_left/* && c->done(left[0],l+1)*/) + left_child = new wt_node_leaf(left[0], count_left); + else + left_child = new wt_node_internal(left, count_left, l+1, c, bmb); + } else { + left_child = NULL; + } + if(count_right>0) { + if(match_right/* && c->done(right[0],l+1)*/) + right_child = new wt_node_leaf(right[0], count_right); + else + right_child = new wt_node_internal(right, count_right, l+1, c, bmb); + } else { + right_child = NULL; + } + delete [] left; + delete [] right; +} + +wt_node_internal::wt_node_internal(uchar * symbols, uint n, uint l, wt_coder * c, static_bitsequence_builder * bmb, uint left, uint *done) { + uint * ibitmap = new uint[n/W+1]; + for(uint i=0;iis_set((uint)symbols[i + left],l)) + bitset(ibitmap,i); + bitmap = bmb->build(ibitmap, n); + delete [] ibitmap; + + uint count_right = bitmap->rank1(n-1); + uint count_left = n-count_right; + + for (uint i=0;iis_set(swap,l)) + j = bitmap->rank0(k)-1; + else + j = count_left + bitmap->rank1(k)-1; + uchar temp = symbols[j+left]; + symbols[j+left] = swap; + swap = temp; + set_field(done,1,k+left,1); + } + + while (get_field(done,1,i+left)) + ++i; + } + + bool match_left = true, match_right = true; + for (uint i=1; i < count_left; i++) + if (symbols[i+left] != symbols[i+left-1]) + match_left = false; + for (uint i=count_left + 1; i < n; i++) + if (symbols[i+left] != symbols[i+left-1]) + match_right = false; + + + if(count_left>0) { + if(match_left/* && c->done(left[0],l+1)*/) + left_child = new wt_node_leaf((uint)symbols[left], count_left); + else + left_child = new wt_node_internal(symbols, count_left, l+1, c, bmb, left, done); + } else { + left_child = NULL; + } + if(count_right>0) { + if(match_right/* && c->done(right[0],l+1)*/) + right_child = new wt_node_leaf((uint)symbols[left+count_left], count_right); + else + right_child = new wt_node_internal(symbols, count_right, l+1, c, bmb, left+count_left, done); + } else { + right_child = NULL; + } +} + + +wt_node_internal::wt_node_internal() { } + +wt_node_internal::~wt_node_internal() { + delete bitmap; + if(right_child!=NULL) delete right_child; + if(left_child!=NULL) delete left_child; +} + +uint wt_node_internal::rank(uint symbol, uint pos, uint l, wt_coder * c) { + bool is_set = c->is_set(symbol,l); + if(!is_set) { + if(left_child==NULL) return 0; + return left_child->rank(symbol, bitmap->rank0(pos)-1,l+1,c); + } + else { + if(right_child==NULL) return 0; + return right_child->rank(symbol, bitmap->rank1(pos)-1,l+1,c); + } +} + +// return value is rank of symbol (less or equal to the given symbol) that has rank > 0, +// the parameter symbol is updated accordinly +uint wt_node_internal::rankLessThan(uint &symbol, uint pos) +{ + uint result = -1; + using std::cout; + using std::endl; +// cout << "pos = " << pos << ", symbol = " << symbol << endl; + + if (pos == (uint)-1) + return (uint)-1; + if(right_child!=NULL) + result = right_child->rankLessThan(symbol, bitmap->rank1(pos)-1); + if(result == (uint)-1 && left_child!=NULL) + return left_child->rankLessThan(symbol, bitmap->rank0(pos)-1); + return result; +} + + +uint wt_node_internal::select(uint symbol, uint pos, uint l, wt_coder * c) { + bool is_set = c->is_set(symbol, l); + uint ret = 0; + if(!is_set) { + if(left_child==NULL) + return (uint)(-1); + uint new_pos = left_child->select(symbol, pos, l+1,c); + if(new_pos+1==0) return (uint)(-1); + ret = bitmap->select0(new_pos)+1; + } else { + if(right_child==NULL) + return (uint)(-1); + uint new_pos = right_child->select(symbol, pos, l+1,c); + if(new_pos+1==0) return (uint)(-1); + ret = bitmap->select1(new_pos)+1; + } + if(ret==0) return (uint)-1; + return ret; +} + +uint wt_node_internal::access(uint pos) { + bool is_set = bitmap->access(pos); + if(!is_set) { + assert(left_child!=NULL); + return left_child->access(bitmap->rank0(pos)-1); + } else { + assert(right_child!=NULL); + return right_child->access(bitmap->rank1(pos)-1); + } +} + +// Returns the value at given position and its rank +uint wt_node_internal::access(uint pos, uint &rank) +{ + bool is_set = bitmap->access(pos); + if(!is_set) + { + // recurse left + pos = bitmap->rank0(pos)-1; + return left_child->access(pos, rank); + } + else + { + // recurse right + pos = bitmap->rank1(pos)-1; + return right_child->access(pos, rank); + } +} + + +void wt_node_internal::access(vector &result, uint i, uint j, uint min, uint max, uint l, uint pivot) +{ + uint symbol = pivot | (1 << l); +// std::cout << "At l = " << l << ", [" << i << ", " << j << "], [" << min << ", " << max << "], symbol = " << symbol << std::endl; + + if (j < i || max < min) + return; + + if (min < symbol) + { + // Recurse left + uint newi = 0; + if (i > 0) + newi = bitmap->rank0(i - 1); + uint newj = bitmap->rank0(j); + + uint newmax = max < symbol - 1 ? max : symbol - 1; + if (left_child != NULL && newj > 0) + left_child->access(result, newi, newj-1, min, newmax, l-1, pivot); + } + + if (max >= symbol) + { + // Recurse right + uint newi = 0; + if (i > 0) + newi = bitmap->rank1(i - 1); + uint newj = bitmap->rank1(j); + + uint newmin = min > symbol ? min : symbol; + if (right_child != NULL && newj > 0) + right_child->access(result, newi, newj-1, newmin, max, l-1, symbol); + } +} + +void wt_node_internal::access(vector &result, uint i, uint j) +{ +// std::cout << "At l = " << l << ", [" << i << ", " << j << "], [" << min << ", " << max << "], symbol = " << symbol << std::endl; + + if (j < i) + return; + + { + // Recurse left + uint newi = 0; + if (i > 0) + newi = bitmap->rank0(i - 1); + uint newj = bitmap->rank0(j); + + if (left_child != NULL && newj > 0) + left_child->access(result, newi, newj-1); + } + + { + // Recurse right + uint newi = 0; + if (i > 0) + newi = bitmap->rank1(i - 1); + uint newj = bitmap->rank1(j); + + if (right_child != NULL && newj > 0) + right_child->access(result, newi, newj-1); + } +} + +// Count +uint wt_node_internal::access(uint i, uint j, uint min, uint max, uint l, uint pivot) +{ + uint count = 0; + uint symbol = pivot | (1 << l); +// std::cout << "At l = " << l << ", [" << i << ", " << j << "], [" << min << ", " << max << "], symbol = " << symbol << std::endl; + + if (j < i || max < min) + return 0; + + if (min < symbol) + { + // Recurse left + uint newi = 0; + if (i > 0) + newi = bitmap->rank0(i - 1); + uint newj = bitmap->rank0(j); + + uint newmax = max < symbol - 1 ? max : symbol - 1; + if (left_child != NULL && newj > 0) + count += left_child->access(newi, newj-1, min, newmax, l-1, pivot); + } + + if (max >= symbol) + { + // Recurse right + uint newi = 0; + if (i > 0) + newi = bitmap->rank1(i - 1); + uint newj = bitmap->rank1(j); + + uint newmin = min > symbol ? min : symbol; + if (right_child != NULL && newj > 0) + count += right_child->access(newi, newj-1, newmin, max, l-1, symbol); + } + return count; +} + + +uint wt_node_internal::size() { + uint s = bitmap->size()+sizeof(wt_node_internal); + if(left_child!=NULL) + s += left_child->size(); + if(right_child!=NULL) + s += right_child->size(); + return s; +} + +uint wt_node_internal::save(FILE *fp) { + uint wr = WT_NODE_INTERNAL_HDR; + wr = fwrite(&wr,sizeof(uint),1,fp); + if(wr!=1) return 1; + if(bitmap->save(fp)) return 1; + if(left_child!=NULL) { + if(left_child->save(fp)) return 1; + } else { + wr = WT_NODE_NULL_HDR; + wr = fwrite(&wr,sizeof(uint),1,fp); + if(wr!=1) return 1; + } + if(right_child!=NULL) { + if(right_child->save(fp)) return 1; + } else { + wr = WT_NODE_NULL_HDR; + wr = fwrite(&wr,sizeof(uint),1,fp); + if(wr!=1) return 1; + } + return 0; +} + +wt_node_internal * wt_node_internal::load(FILE *fp) { + uint rd; + if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL; + if(rd!=WT_NODE_INTERNAL_HDR) return NULL; + wt_node_internal * ret = new wt_node_internal(); + ret->bitmap = static_bitsequence::load(fp); + ret->left_child = wt_node::load(fp); + ret->right_child = wt_node::load(fp); + return ret; +} diff --git a/src/static_sequence/wt_node_internal.h b/src/static_sequence/wt_node_internal.h new file mode 100644 index 0000000..f649de4 --- /dev/null +++ b/src/static_sequence/wt_node_internal.h @@ -0,0 +1,63 @@ +/* wt_node_internal.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * wt_node_internal + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef wt_node_internal_h +#define wt_node_internal_h + +#include +#include +#include +#include +#include +#include +#include +using std::vector; + +/** Clase for representing internal nodes + * + * @author Francisco Claude + */ +class wt_node_internal: public wt_node { + public: + wt_node_internal(uint * seq, uint n, uint l, wt_coder * c, static_bitsequence_builder * bmb); + wt_node_internal(uchar * seq, uint n, uint l, wt_coder * c, static_bitsequence_builder * bmb, uint, uint *); + virtual ~wt_node_internal(); + virtual uint rank(uint symbol, uint pos, uint level, wt_coder * c); + virtual uint rankLessThan(uint &symbol, uint pos); + virtual uint select(uint symbol, uint pos, uint level, wt_coder * c); + virtual uint access(uint pos); + virtual uint access(uint pos, uint &rank); + virtual void access(vector &result, uint i, uint j, uint min, uint max, uint l, uint pivot); + virtual void access(vector &result, uint i, uint j); + virtual uint access(uint i, uint j, uint min, uint max, uint l, uint pivot); + virtual uint size(); + virtual uint save(FILE *fp); + static wt_node_internal * load(FILE *fp); + + + protected: + wt_node_internal(); + wt_node *left_child, *right_child; + static_bitsequence * bitmap; + //uint length; +}; + +#endif diff --git a/src/static_sequence/wt_node_leaf.cpp b/src/static_sequence/wt_node_leaf.cpp new file mode 100644 index 0000000..9d15193 --- /dev/null +++ b/src/static_sequence/wt_node_leaf.cpp @@ -0,0 +1,111 @@ +/* wt_node_leaf.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * wt_node_leaf + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +wt_node_leaf::wt_node_leaf(uint symbol, uint count) { + this->symbol = symbol; + this->count = count; +} + +wt_node_leaf::wt_node_leaf() {} + +wt_node_leaf::~wt_node_leaf() {} + +uint wt_node_leaf::rank(uint symbol, uint pos, uint l, wt_coder * c) { + if(symbol!=this->symbol) return 0; + pos++; + return pos; +} + +uint wt_node_leaf::rankLessThan(uint &symbol, uint pos) { +// std::cout <<"this-symbol: " << (uchar)this->symbol << ", symbol = " << (uchar)symbol << ", pos = " << pos << std::endl; + if (pos == (uint)-1 || symbol < this->symbol) + return -1; + symbol = this->symbol; + pos++; + return pos; +} + +uint wt_node_leaf::select(uint symbol, uint pos, uint l, wt_coder * c) { + if(symbol!=this->symbol) return (uint)-1; + if(pos==0 || pos>count) return (uint)-1; + return pos; +} + +uint wt_node_leaf::access(uint pos) { +// std::cout <<"this-symbol: " << (uchar)this->symbol << ", pos = " << pos << std::endl; + + return symbol; +} + +uint wt_node_leaf::access(uint pos, uint &rank) { + rank = pos+1; + return symbol; +} + +void wt_node_leaf::access(vector &result, uint i, uint j, uint min, uint max, uint l, uint pivot) +{ +// std::cout << "At l = " << l << ", [" << i << ", " << j << "], [" << min << ", " << max << "], symbol = " << symbol << std::endl; + + if (i <= j && symbol >= min && symbol <= max) + result.push_back((int)symbol); +} + +void wt_node_leaf::access(vector &result, uint i, uint j) +{ +// std::cout << "At l = " << l << ", [" << i << ", " << j << "], [" << min << ", " << max << "], symbol = " << symbol << std::endl; + + if (i <= j) + result.push_back((int)symbol); +} + +uint wt_node_leaf::access(uint i, uint j, uint min, uint max, uint l, uint pivot) +{ +// std::cout << "At l = " << l << ", [" << i << ", " << j << "], [" << min << ", " << max << "], symbol = " << symbol << std::endl; + + if (i <= j && symbol >= min && symbol <= max) + return 1; + return 0; +} + +uint wt_node_leaf::size() { + return sizeof(wt_node_leaf); +} + +uint wt_node_leaf::save(FILE *fp) { + uint wr = WT_NODE_LEAF_HDR; + wr = fwrite(&wr,sizeof(uint),1,fp); + wr += fwrite(&count,sizeof(uint),1,fp); + wr += fwrite(&symbol,sizeof(uint),1,fp); + return wr-3; +} + +wt_node_leaf * wt_node_leaf::load(FILE *fp) { + uint rd; + if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL; + if(rd!=WT_NODE_LEAF_HDR) return NULL; + wt_node_leaf * ret = new wt_node_leaf(); + rd = fread(&(ret->count),sizeof(uint),1,fp); + rd += fread(&(ret->symbol),sizeof(uint),1,fp); + if(rd!=2) { delete ret; return NULL; } + return ret; +} diff --git a/src/static_sequence/wt_node_leaf.h b/src/static_sequence/wt_node_leaf.h new file mode 100644 index 0000000..3d5aac9 --- /dev/null +++ b/src/static_sequence/wt_node_leaf.h @@ -0,0 +1,57 @@ +/* wt_node_leaf.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * wt_node_leaf + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef wt_node_leaf_h +#define wt_node_leaf_h + +#include +#include +#include +#include +#include + +/** Class for representing leaves of the wavelet tree. + * + * @author Francisco Claude + */ +class wt_node_leaf: public wt_node { + public: + wt_node_leaf(uint symbol, uint count); + virtual ~wt_node_leaf(); + virtual uint rank(uint symbol, uint pos, uint l, wt_coder * c); + virtual uint rankLessThan(uint &symbol, uint pos); + virtual uint select(uint symbol, uint pos, uint l, wt_coder * c); + virtual uint access(uint pos); + virtual uint access(uint pos, uint &rank); + virtual void access(std::vector &result, uint i, uint j, uint min, uint max, uint l, uint pivot); + virtual void access(std::vector &result, uint i, uint j); + virtual uint access(uint i, uint j, uint min, uint max, uint l, uint pivot); + virtual uint size(); + virtual uint save(FILE *fp); + static wt_node_leaf * load(FILE *fp); + + protected: + wt_node_leaf(); + uint symbol; + uint count; +}; + +#endif diff --git a/src/utils/alphabet_mapper.cpp b/src/utils/alphabet_mapper.cpp new file mode 100644 index 0000000..0d35530 --- /dev/null +++ b/src/utils/alphabet_mapper.cpp @@ -0,0 +1,47 @@ +/* alphabet_mapper.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_bitsequence definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +alphabet_mapper::alphabet_mapper() { + user_count=0; +} + +void alphabet_mapper::use() { + user_count++; +} + +void alphabet_mapper::unuse() { + user_count--; + if(user_count==0) + delete this; +} + +alphabet_mapper * alphabet_mapper::load(FILE *fp) { + uint rd; + if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL; + fseek(fp,-1*sizeof(uint),SEEK_CUR); + switch(rd) { + case ALPHABET_MAPPER_NONE_HDR: return alphabet_mapper_none::load(fp); + case ALPHABET_MAPPER_CONT_HDR: return alphabet_mapper_cont::load(fp); + } + return NULL; +} diff --git a/src/utils/alphabet_mapper.h b/src/utils/alphabet_mapper.h new file mode 100644 index 0000000..9867674 --- /dev/null +++ b/src/utils/alphabet_mapper.h @@ -0,0 +1,60 @@ +/* alphabet_mapper.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_bitsequence definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _ALPHABET_MAPPER_H +#define _ALPHABET_MAPPER_H + +#include +#include + +#define ALPHABET_MAPPER_NONE_HDR 2 +#define ALPHABET_MAPPER_CONT_HDR 3 + +//using namespace std; + +/** Base class for alphabet mappers + * + * @author Francisco Claude + */ +class alphabet_mapper { + public: + alphabet_mapper(); + virtual ~alphabet_mapper() {} + /** Maps the symbol */ + virtual uint map(uint s)=0; + /** Unmaps the symbol */ + virtual uint unmap(uint s)=0; + /** Returns the size of the mapper */ + virtual uint size()=0; + /** Saves the mapper to a file */ + virtual uint save(FILE *fp)=0; + /** Loads the mapper from a file */ + static alphabet_mapper * load(FILE * fp); + virtual void use(); + virtual void unuse(); + protected: + uint user_count; +}; + +#include +#include + +#endif /* _ALPHABET_MAPPER_H */ diff --git a/src/utils/alphabet_mapper_cont.cpp b/src/utils/alphabet_mapper_cont.cpp new file mode 100644 index 0000000..776899b --- /dev/null +++ b/src/utils/alphabet_mapper_cont.cpp @@ -0,0 +1,77 @@ +/* alphabet_mapper_cont.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * alphabet_mapper_cont definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +using std::max; +alphabet_mapper_cont::alphabet_mapper_cont(uint * seq, uint n, static_bitsequence_builder *bmb) { + uint max_v = 0; + for(uint i=0;ibuild(bmap,max_v); + delete [] bmap; +} + +alphabet_mapper_cont::alphabet_mapper_cont() { +} + +alphabet_mapper_cont::~alphabet_mapper_cont() { + delete m; +} + +uint alphabet_mapper_cont::map(uint s) { + return m->rank1(s); +} + +uint alphabet_mapper_cont::unmap(uint s) { + return m->select1(s); +} + +uint alphabet_mapper_cont::size() { + return sizeof(alphabet_mapper_cont)+m->size(); +} + +uint alphabet_mapper_cont::save(FILE *fp) { + uint wr = ALPHABET_MAPPER_CONT_HDR; + wr = fwrite(&wr,sizeof(uint),1,fp); + if(wr!=1) return 1; + if(m->save(fp)) return 1; + return 0; +} + +alphabet_mapper_cont * alphabet_mapper_cont::load(FILE * fp) { + uint rd; + if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL; + if(rd!=ALPHABET_MAPPER_CONT_HDR) return NULL; + alphabet_mapper_cont * ret = new alphabet_mapper_cont(); + ret->m = static_bitsequence::load(fp); + if(ret->m==NULL) { + delete ret; + return NULL; + } + return ret; +} diff --git a/src/utils/alphabet_mapper_cont.h b/src/utils/alphabet_mapper_cont.h new file mode 100644 index 0000000..97302de --- /dev/null +++ b/src/utils/alphabet_mapper_cont.h @@ -0,0 +1,52 @@ +/* alphabet_mapper_cont.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * alphabet_mapper_cont definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _ALPHABET_MAPPER_CONT_H +#define _ALPHABET_MAPPER_CONT_H + +#include +#include +#include +#include +#include + +//using namespace std; + +/** Mapper that doesn't change the value (identity) + * + * @author Francisco Claude + */ +class alphabet_mapper_cont : public alphabet_mapper { + public: + alphabet_mapper_cont(uint * seq, uint n, static_bitsequence_builder *bmb); + virtual ~alphabet_mapper_cont(); + virtual uint map(uint s); + virtual uint unmap(uint s); + virtual uint size(); + virtual uint save(FILE *fp); + static alphabet_mapper_cont * load(FILE *fp); + + protected: + alphabet_mapper_cont(); + static_bitsequence * m; +}; + +#endif /* _ALPHABET_MAPPER_CONT_H */ diff --git a/src/utils/alphabet_mapper_none.cpp b/src/utils/alphabet_mapper_none.cpp new file mode 100644 index 0000000..1e545f0 --- /dev/null +++ b/src/utils/alphabet_mapper_none.cpp @@ -0,0 +1,44 @@ +/* alphabet_mapper_none.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * alphabet_mapper definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +alphabet_mapper_none::alphabet_mapper_none() { } + +uint alphabet_mapper_none::map(uint s) {return s;} + +uint alphabet_mapper_none::unmap(uint s) {return s;} + +uint alphabet_mapper_none::size() { return sizeof(alphabet_mapper_none); } + +uint alphabet_mapper_none::save(FILE *fp) { + uint wr = ALPHABET_MAPPER_NONE_HDR; + wr = fwrite(&wr,sizeof(uint),1,fp); + if(wr!=1) return 1; + return 0; +} + +alphabet_mapper_none * alphabet_mapper_none::load(FILE * fp) { + uint rd; + if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL; + if(rd!=ALPHABET_MAPPER_NONE_HDR) return NULL; + return new alphabet_mapper_none(); +} diff --git a/src/utils/alphabet_mapper_none.h b/src/utils/alphabet_mapper_none.h new file mode 100644 index 0000000..5484451 --- /dev/null +++ b/src/utils/alphabet_mapper_none.h @@ -0,0 +1,46 @@ +/* alphabet_mapper_none.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_bitsequence definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _ALPHABET_MAPPER_NONE_H +#define _ALPHABET_MAPPER_NONE_H + +#include +#include +#include + +//using namespace std; + +/** Mapper that doesn't change the value (identity) + * + * @author Francisco Claude + */ +class alphabet_mapper_none : public alphabet_mapper { + public: + alphabet_mapper_none(); + virtual ~alphabet_mapper_none() {} + virtual uint map(uint s); + virtual uint unmap(uint s); + virtual uint size(); + virtual uint save(FILE *fp); + static alphabet_mapper_none * load(FILE *fp); +}; + +#endif /* _ALPHABET_MAPPER_NONE_H */ diff --git a/tests/Makefile b/tests/Makefile new file mode 100644 index 0000000..b1ac75b --- /dev/null +++ b/tests/Makefile @@ -0,0 +1,51 @@ +CPP=g++ + +#CPPFLAGS=-g3 -Wall -I../includes/ +CPPFLAGS=-O9 -Wall -DNDEBUG -I../includes/ + +OBJECTS=make_bitmap.o static_bitsequence_tester.o static_sequence_tester.o static_sequence_wvtree_test.o static_sequence_gmr_test.o static_sequence_gmr_chunk_test.o static_sequence_wvtree_noptrs_test.o static_bitsequence_test.o static_sequence_bs_test.o text_to_int.o +BIN=make_bitmap static_sequence_wvtree_test static_sequence_gmr_test static_sequence_gmr_chunk_test static_sequence_wvtree_noptrs_test static_bitsequence_test text_to_int static_sequence_bs_test + +LIB=../lib/libcds.a + +%.o: %.cpp + @echo " [C++] Compiling $<" + @$(CPP) $(CPPFLAGS) -c $< -o $@ + +all: $(OBJECTS) $(BIN) + +static_bitsequence_test: + @echo " [C++] Building static_bitsequence_test" + @$(CPP) $(CPPFLAGS) -o static_bitsequence_test static_bitsequence_test.o static_bitsequence_tester.o $(LIB) + +make_bitmap: + @echo " [C++] Building make_bitmap" + @$(CPP) $(CPPFLAGS) -o make_bitmap make_bitmap.o $(LIB) + +text_to_int: + @echo " [C++] Building text_to_int" + @$(CPP) $(CPPFLAGS) -o text_to_int text_to_int.o $(LIB) + +static_sequence_wvtree_test: + @echo " [C++] Building static_sequence_wvtree_test" + @$(CPP) $(CPPFLAGS) -o static_sequence_wvtree_test static_sequence_wvtree_test.o static_sequence_tester.o $(LIB) + +static_sequence_gmr_test: + @echo " [C++] Building static_sequence_gmr_test" + @$(CPP) $(CPPFLAGS) -o static_sequence_gmr_test static_sequence_gmr_test.o static_sequence_tester.o $(LIB) + +static_sequence_wvtree_noptrs_test: + @echo " [C++] Building static_sequence_wvtree_noptrs_test" + @$(CPP) $(CPPFLAGS) -o static_sequence_wvtree_noptrs_test static_sequence_wvtree_noptrs_test.o static_sequence_tester.o $(LIB) + +static_sequence_gmr_chunk_test: + @echo " [C++] Building static_sequence_gmr_chunk_test" + @$(CPP) $(CPPFLAGS) -o static_sequence_gmr_chunk_test static_sequence_gmr_chunk_test.o static_sequence_tester.o $(LIB) + +static_sequence_bs_test: + @echo " [C++] Building static_sequence_bs_test" + @$(CPP) $(CPPFLAGS) -o static_sequence_bs_test static_sequence_bs_test.o static_sequence_tester.o $(LIB) + +clean: + @echo " [CLN] Cleaning object files" + @rm -f $(OBJECTS) $(BIN) diff --git a/tests/make_bitmap.cpp b/tests/make_bitmap.cpp new file mode 100644 index 0000000..11d2f28 --- /dev/null +++ b/tests/make_bitmap.cpp @@ -0,0 +1,57 @@ +/* make_bitmap.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * make_bitmap + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include + +using namespace std; + +int main(int argc, char ** argv) { + if(argc!=4) { + cout << "usage: " << argv[0] << " " << endl; + return 0; + } + char * fname = argv[1]; + uint len = atoi(argv[2]); + uint ones = atoi(argv[3]); + uint * bm = new uint[uint_len(len,1)]; + for(uint i=0;i +#include +#include +#include +#include "static_bitsequence_tester.h" + +using namespace std; + +int main(int argc, char ** argv) { + if(argc!=5) { + cout << "usage: " << argv[0] << " " << endl; + return 0; + } + FILE * fp = fopen(argv[1],"r"); + if(fp==NULL) { + cout << "Error opening " << argv[1] << endl; + return -1; + } + uint *bitseq, len;//, ones; + uint l=fread(&len, sizeof(uint), 1, fp); + //l += fread(&ones,sizeof(uint),1,fp); + bitseq = new uint[uint_len(len,1)]; + l+=fread(bitseq, sizeof(uint), uint_len(len,1), fp); + fclose(fp); + + uint sample_rate; + stringstream ss(argv[3]); + ss >> sample_rate; + + static_bitsequence * bs; + + if(string(argv[2])==string("r")) bs = new static_bitsequence_rrr02(bitseq,len,sample_rate); + if(string(argv[2])==string("s")) bs = new static_bitsequence_sdarray(bitseq,len); + else bs = new static_bitsequence_brw32(bitseq,len,sample_rate); + + cout << "Size: " << bs->size() << endl; + cout << "bpb = " << bs->size()*8./len << endl; + + /*for(uint kk=0;kk<30;kk++) + cout << bs->access(kk); + cout << endl;*/ + + /*for(uint kk=0;kk<20;kk++) { + bs->select_next1(kk); + }*/ + + if(string(argv[4])==string("t")) + test_bitsequence(bitseq,len,bs); + cout << "******************************************" << endl; + speed_access(bs, bitseq, len); + cout << "******************************************" << endl; + speed_rank0(bs, bitseq, len); + cout << "******************************************" << endl; + speed_rank1(bs, bitseq, len); + cout << "******************************************" << endl; + speed_select0(bs, bitseq, len); + cout << "******************************************" << endl; + speed_select1(bs, bitseq, len); + cout << "******************************************" << endl; + speed_selectnext1(bs, bitseq, len); +} diff --git a/tests/static_bitsequence_tester.cpp b/tests/static_bitsequence_tester.cpp new file mode 100644 index 0000000..509107d --- /dev/null +++ b/tests/static_bitsequence_tester.cpp @@ -0,0 +1,209 @@ +/* static_bitsequence_tester.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_bitsequence_tester + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include + +using namespace std; + +/* Time meassuring */ +double ticks= (double)sysconf(_SC_CLK_TCK); +struct tms t1,t2; + +void start_clock() { + times (&t1); +} + + +double stop_clock() { + times (&t2); + return (t2.tms_utime-t1.tms_utime)/ticks; +} +/* end Time meassuring */ + +uint NQUERIES=10000000; +uint SEED=47; + +void load(char *fname, uint ** text, uint * n) { + FILE * fp = fopen(fname,"r"); + if(fp==NULL) { + cout << "could not open " << fname << endl; + return; + } + if(fread(n,sizeof(uint),1,fp)!=1) { + cout << "Error reading file " << fname << endl; + return; + } + *text = new uint[uint_len(*n,1)]; + + if(fread(*text,sizeof(uint),uint_len(*n,1),fp)!=uint_len(*n,1)) { + cout << "Error reading file " << fname << endl; + return; + } +} + +void test_bitsequence(uint * bitseq, uint len, static_bitsequence * bs) { + uint ones = 0; + uint last_one = 0; + bool error = false; + for(uint i=0;ilength()/10))==0) { cout << endl; cout.flush(); } + } + if(bitget(bitseq,i)) { + for(uint k=last_one; !error && kselect_next1(k)!=i) { + uint ans= bs->select_next1(k); + cout << "Error select_next1" << endl; + cout << " got: (k=" << k << ") " << ans << " expected: " << i << endl; + cout << " rank(" << k << ")=" << bs->rank1(k) << " access(" << k << ")=" << bs->access(k) << endl; + cout << " rank(" << ans << ")=" << bs->rank1(ans) << " access(" << ans << ")=" << bs->access(ans) << endl; + error = true; + } + } + last_one = i; + ones++; + } + if(bs->access(i) != (bitget(bitseq,i)!=0)) { + cout << "Access error for position " << i << endl; + cout << " got: " << bs->access(i) << " expected: " << (bitget(bitseq,i)!=0) << endl; + error = true; + } + if(bs->rank1(i) != ones) { + cout << "Rank1 error for position " << i << endl; + cout << " got: " << bs->rank1(i) << " expected: " << ones << endl; + error = true; + } + if(bitget(bitseq,i) && bs->select1(ones) != i) { + cout << "Select1 error for position " << i << " ones:" << ones << endl; + cout << " got: " << bs->select1(ones) << " expected: " << i << endl; + error = true; + } + if(bs->rank0(i) != i+1-ones) { + cout << "Rank0 error for position " << i << endl; + cout << " got: " << bs->rank0(i) << " expected: " << ones << endl; + error = true; + } + if(!bitget(bitseq,i) && bs->select0(i+1-ones) != i) { + cout << "Select0 error for position " << i << endl; + cout << " got: " << bs->select0(i+1-ones) << " expected: " << i << endl; + error = true; + } + } + cout << "." << endl; +} + +void speed_access(static_bitsequence * ss, uint * bitseq, uint n) { + uint acc=0; + srand(SEED); + + start_clock(); + for(uint i=0;iaccess(pos); + } + double t = stop_clock(); + cout << " * Time for " << NQUERIES << " accesses: " << t << " secs" << endl; + cout << " * Time per access: " << 1000*t/NQUERIES << " msecs" << endl; + cout << " - Check sum: " << acc << endl; +} + +void speed_rank0(static_bitsequence * ss, uint * bitseq, uint n) { + uint acc=0; + srand(SEED); + + start_clock(); + for(uint i=0;irank0(pos); + } + double t = stop_clock(); + cout << " * Time for " << NQUERIES << " rank0s: " << t << " secs" << endl; + cout << " * Time per rank0: " << 1000*t/NQUERIES << " msecs" << endl; + cout << " - Check sum: " << acc << endl; +} + +void speed_rank1(static_bitsequence * ss, uint * bitseq, uint n) { + uint acc=0; + srand(SEED); + + start_clock(); + for(uint i=0;irank1(pos); + } + double t = stop_clock(); + cout << " * Time for " << NQUERIES << " rank1s: " << t << " secs" << endl; + cout << " * Time per rank1: " << 1000*t/NQUERIES << " msecs" << endl; + cout << " - Check sum: " << acc << endl; +} + +void speed_select0(static_bitsequence * ss, uint * bitseq, uint n) { + uint acc=0; + uint ones=ss->rank0(n-1); + srand(SEED); + + start_clock(); + for(uint i=0;iselect0(pos); + } + double t = stop_clock(); + cout << " * Time for " << NQUERIES << " select0s: " << t << " secs" << endl; + cout << " * Time per select0: " << 1000*t/NQUERIES << " msecs" << endl; + cout << " - Check sum: " << acc << endl; +} + +void speed_select1(static_bitsequence * ss, uint * bitseq, uint n) { + uint acc=0; + uint ones=ss->rank1(n-1); + srand(SEED); + + start_clock(); + for(uint i=0;iselect1(pos); + } + double t = stop_clock(); + cout << " * Time for " << NQUERIES << " select1s: " << t << " secs" << endl; + cout << " * Time per select1: " << 1000*t/NQUERIES << " msecs" << endl; + cout << " - Check sum: " << acc << endl; +} + +void speed_selectnext1(static_bitsequence * ss, uint * bitseq, uint n) { + uint acc=0; + srand(SEED); + + start_clock(); + for(uint i=0;iselect_next1(pos); + } + double t = stop_clock(); + cout << " * Time for " << NQUERIES << " select_next1s: " << t << " secs" << endl; + cout << " * Time per select_next1: " << 1000*t/NQUERIES << " msecs" << endl; + cout << " - Check sum: " << acc << endl; +} diff --git a/tests/static_bitsequence_tester.h b/tests/static_bitsequence_tester.h new file mode 100644 index 0000000..c5de5cf --- /dev/null +++ b/tests/static_bitsequence_tester.h @@ -0,0 +1,44 @@ +/* static_bitsequence_tester.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_bitsequence_tester + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + + +#ifndef STATIC_BITSEQUENCE_TESTER_H +#define STATIC_BITSEQUENCE_TESTER_H + +void load(char *fname, uint ** text, uint * n); +void test_bitsequence(uint * bitseq, uint len, static_bitsequence * bs); +void speed_access(static_bitsequence * ss, uint * bitseq, uint n); +void speed_rank0(static_bitsequence * ss, uint * bitseq, uint n); +void speed_rank1(static_bitsequence * ss, uint * bitseq, uint n); +void speed_select0(static_bitsequence * ss, uint * bitseq, uint n); +void speed_select1(static_bitsequence * ss, uint * bitseq, uint n); +void speed_selectnext1(static_bitsequence * ss, uint * bitseq, uint n); + +#endif diff --git a/tests/static_sequence_bs_test.cpp b/tests/static_sequence_bs_test.cpp new file mode 100644 index 0000000..ffe2e76 --- /dev/null +++ b/tests/static_sequence_bs_test.cpp @@ -0,0 +1,76 @@ +/* static_sequence_wvtree_test.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_sequence_wvtree_test + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "static_sequence_tester.h" + +int main(int argc, char ** argv) { + if(argc!=5) { + cout << "Usage: " << argv[0] << " " << endl; + return 0; + } + stringstream ss; + ss << argv[3]; + uint samp; + ss >> samp; + + uint * text; + uint n; + load(argv[1],&text,&n); + + alphabet_mapper * am = new alphabet_mapper_none(); + + static_bitsequence_builder * bmb; + if(string(argv[2])==string("b")) + bmb = new static_bitsequence_builder_brw32(samp); + else if(string(argv[2])==string("s")) + bmb = new static_bitsequence_builder_sdarray(); + else + bmb = new static_bitsequence_builder_rrr02(samp); + + static_sequence * sseq = new static_sequence_bs(text,n,am,bmb); + delete bmb; + //am->unuse(); + + sseq = savetest(argv[1], sseq); + if(string(argv[4])==string("t")) + test_static_sequence(text,n,sseq); + else + cout << "Size: " << sseq->size() << endl; + cout << "*************************************" << endl; + speed_access(sseq,text,n); + cout << "*************************************" << endl; + speed_rank(sseq,text,n); + cout << "*************************************" << endl; + speed_select(sseq,text,n); + + delete sseq; + delete [] text; +} + diff --git a/tests/static_sequence_gmr_chunk_test.cpp b/tests/static_sequence_gmr_chunk_test.cpp new file mode 100644 index 0000000..1cb656c --- /dev/null +++ b/tests/static_sequence_gmr_chunk_test.cpp @@ -0,0 +1,80 @@ +/* static_sequence_gmr_chunk_test.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_sequence_gmr_chunk_test + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "static_sequence_tester.h" + +int main(int argc, char ** argv) { + if(argc!=6) { + cout << "Usage: " << argv[0] << " " << endl; + return 0; + } + stringstream ss; + ss << argv[3]; + uint samp; + ss >> samp; + stringstream ss2; + ss2 << argv[4]; + uint perm_samp; + ss2 >> perm_samp; + + uint * text; + uint n; + load(argv[1],&text,&n); + + static_bitsequence_builder * bmb; + if(string(argv[2])==string("b")) + bmb = new static_bitsequence_builder_brw32(samp); + else + bmb = new static_bitsequence_builder_rrr02(samp); + + static_permutation_builder * spb = new static_permutation_builder_mrrr(perm_samp,bmb); + static_sequence_builder * ssb = new static_sequence_builder_gmr_chunk(bmb, spb); + static_sequence * sseq = ssb->build(text,n); + + delete bmb; + delete ssb; + delete spb; + + sseq = savetest(argv[1], sseq); + if(string(argv[5])==string("t")) + test_static_sequence(text,n,sseq); + else + cout << "Size: " << sseq->size() << endl; + cout << "*************************************" << endl; + speed_access(sseq,text,n); + cout << "*************************************" << endl; + speed_rank(sseq,text,n); + cout << "*************************************" << endl; + speed_select(sseq,text,n); + + delete sseq; + delete [] text; +} + diff --git a/tests/static_sequence_gmr_test.cpp b/tests/static_sequence_gmr_test.cpp new file mode 100644 index 0000000..26cb00b --- /dev/null +++ b/tests/static_sequence_gmr_test.cpp @@ -0,0 +1,95 @@ +/* static_sequence_gmr_test.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_sequence_gmr_test + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "static_sequence_tester.h" + +int main(int argc, char ** argv) { + if(argc!=8) { + cout << "Usage: " << argv[0] << " " << endl; + return 0; + } + stringstream ss; + ss << argv[4]; + uint samp; + ss >> samp; + stringstream ss2; + ss2 << argv[5]; + uint chunk_length; + ss2 >> chunk_length; + stringstream ss3; + ss3 << argv[6]; + uint perm_samp; + ss3 >> perm_samp; + + uint * text; + uint n; + load(argv[1],&text,&n); + + static_bitsequence_builder * bmb; + if(string(argv[2])==string("b")) + bmb = new static_bitsequence_builder_brw32(samp); + else + bmb = new static_bitsequence_builder_rrr02(samp); + + static_sequence_builder * ssb; + + if(string(argv[3])==string("w")) { + alphabet_mapper * am = new alphabet_mapper_cont(text,n,bmb); + ssb = new static_sequence_builder_wvtree_noptrs(bmb,am); + } else if(string(argv[3])==string("p")) { + alphabet_mapper * am = new alphabet_mapper_none(); + wt_coder * wc = new wt_coder_huff(text,n,am); + ssb = new static_sequence_builder_wvtree(wc,bmb,am); + } else { + ssb = new static_sequence_builder_gmr_chunk(bmb, new static_permutation_builder_mrrr(perm_samp,bmb)); + } + + static_sequence * sseq = new static_sequence_gmr(text,n,chunk_length,bmb,ssb); + + delete bmb; + delete ssb; + + sseq = savetest(argv[1], sseq); + if(string(argv[7])==string("t")) + test_static_sequence(text,n,sseq); + else + cout << "Size: " << sseq->size() << endl; + cout << "*************************************" << endl; + speed_access(sseq,text,n); + cout << "*************************************" << endl; + speed_rank(sseq,text,n); + cout << "*************************************" << endl; + speed_select(sseq,text,n); + + delete sseq; + delete [] text; +} + diff --git a/tests/static_sequence_tester.cpp b/tests/static_sequence_tester.cpp new file mode 100644 index 0000000..48906f1 --- /dev/null +++ b/tests/static_sequence_tester.cpp @@ -0,0 +1,241 @@ +/* static_sequence_tester.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_sequence_tester + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "static_sequence_tester.h" + +using namespace std; + +/* Time meassuring */ +double ticks= (double)sysconf(_SC_CLK_TCK); +struct tms t1,t2; + +void start_clock() { + times (&t1); +} + + +double stop_clock() { + times (&t2); + return (t2.tms_utime-t1.tms_utime)/ticks; +} +/* end Time meassuring */ + +uint NQUERIES=100000; +uint SEED=47; + +void test_static_sequence(uint * symbols, uint n, static_sequence * ss) { + cout << "Size: " << ss->size() << endl; + uint max_v=0; + for(uint i=0;iaccess(i); + uint r = /*occ[symbols[i]];/*/ss->rank(symbols[i],i); + uint s = /*i; /*/ss->select(symbols[i],occ[symbols[i]]); + uint rM1 = (i==0)?0:ss->rank(symbols[i],i-1); + if(r!=occ[symbols[i]]) { + cout << "Error in rank for symbol " << symbols[i] << " at position " << i << endl; + cout << "value: " << r << endl; + cout << "Expected: " << occ[symbols[i]] << endl; + error = true; + } + if(s!=i) { + cout << "Error in select for symbol " << symbols[i] << " at position " << occ[symbols[i]] << endl; + cout << "value: " << s << endl; + cout << "Expected: " << i << endl; + error = true; + } + if(a!=symbols[i]) { + cout << "Error in access at position " << i << endl; + cout << "value: " << a << endl; + cout << "Expected: " << symbols[i] << endl; + error = true; + } + if(rM1!=occ[symbols[i]]-1) { + cout << "Error in rankM1 for symbol " << symbols[i] << " at position " << i-1 << endl; + cout << "value: " << rM1 << endl; + cout << "Expected: " << occ[symbols[i]]-1 << endl; + error = true; + } + } + if(!error) + cout << "Test OK! It works :)" << endl; + delete [] occ; +} + +void load(char *fname, uint ** text, uint * n) { + struct stat text_info; + if(stat(fname,&text_info)<0) { + cout << "could not stat: " << fname << endl; + return; + } + + *n= (uint)text_info.st_size/4; + *text = new uint[*n+1]; + FILE * fp = fopen(fname,"r"); + if(fp==NULL) { + cout << "could not open " << fname << endl; + return; + } + + cout << "File: " << fname << endl; + cout << "Length: " << *n << endl; + + uint max_symbol = 0; + for(uint i=0;i<*n;i++) { + uint c; + uint read=fread(&c,sizeof(uint),1,fp); + //assert(read==1); + (*text)[i] = 1+(uint)c; + c += read; + max_symbol = max(max_symbol,(*text)[i]); + } + max_symbol++; + fclose(fp); + + /*static_sequence * ss = ssb->build(*text,*n); + + char * fname2 = new char[10+string(fname).length()]; + sprintf(fname2,"%s.wt",fname); + fp = fopen(fname2,"w"); + ss->save(fp); + fclose(fp); + delete ss; + fp = fopen(fname2,"r"); + ss = static_sequence::load(fp); + fclose(fp); + delete [] fname2; + return ss;*/ +} + +static_sequence * savetest(char * bname, static_sequence * ss) { + char * fname = new char[10+string(bname).length()]; + sprintf(fname,"%s.ss",bname); + FILE * fp = fopen(fname,"w"); + cout << "Saving structure ... "; cout.flush(); + ss->save(fp); + cout << "done" << endl; cout.flush(); + fclose(fp); + cout << "Deleting structure ... "; cout.flush(); + delete ss; + cout << "done" << endl; cout.flush(); + fp = fopen(fname,"r"); + cout << "Loading structure ... "; cout.flush(); + ss = static_sequence::load(fp); + cout << "done" << endl; cout.flush(); + fclose(fp); + if(ss==NULL) cout << "Error loading static_sequence" << endl; + //cout << ss << endl; + delete [] fname; + return ss; +} + +void speed_rank(static_sequence * ss, uint * text, uint n) { + uint max_symbol = 0; + for(uint i=0;i0) + valid_symbols[c++]=i; + } + + uint acc=0; + srand(SEED); + + start_clock(); + for(uint i=0;irank(valid_symbols[symb],pos); + } + double t = stop_clock(); + cout << " * Time for " << NQUERIES << " ranks: " << t << " secs" << endl; + cout << " * Time per rank: " << 1000*t/NQUERIES << " msecs" << endl; + cout << " - Check sum: " << acc << endl; + delete [] valid_symbols; + delete [] occ; +} + +void speed_select(static_sequence * ss, uint * text, uint n) { + uint max_symbol = 0; + for(uint i=0;i0) + valid_symbols[c++]=i; + } + + uint acc=0; + srand(SEED); + + start_clock(); + for(uint i=0;iselect(valid_symbols[symb],pos); + } + double t = stop_clock(); + cout << " * Time for " << NQUERIES << " selects: " << t << " secs" << endl; + cout << " * Time per select: " << 1000*t/NQUERIES << " msecs" << endl; + cout << " - Check sum: " << acc << endl; + delete [] valid_symbols; + delete [] occ; +} + +void speed_access(static_sequence * ss, uint * text, uint n) { + uint acc=0; + srand(SEED); + + start_clock(); + for(uint i=0;iaccess(pos); + } + double t = stop_clock(); + cout << " * Time for " << NQUERIES << " accesses: " << t << " secs" << endl; + cout << " * Time per access: " << 1000*t/NQUERIES << " msecs" << endl; + cout << " - Check sum: " << acc << endl; +} diff --git a/tests/static_sequence_tester.h b/tests/static_sequence_tester.h new file mode 100644 index 0000000..a3583b7 --- /dev/null +++ b/tests/static_sequence_tester.h @@ -0,0 +1,42 @@ +/* static_sequence_tester.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_sequence_tester + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + + +#ifndef STATIC_SEQUENCE_TESTER_H +#define STATIC_SEQUENCE_TESTER_H + +void test_static_sequence(uint * symbols, uint n, static_sequence * ss); +void load(char *fname, uint ** text, uint * n); +static_sequence * savetest(char * bname, static_sequence * ss); +void speed_rank(static_sequence * ss, uint * text, uint n); +void speed_select(static_sequence * ss, uint * text, uint n); +void speed_access(static_sequence * ss, uint * text, uint n); + +#endif diff --git a/tests/static_sequence_wvtree_noptrs_test.cpp b/tests/static_sequence_wvtree_noptrs_test.cpp new file mode 100644 index 0000000..ea788d0 --- /dev/null +++ b/tests/static_sequence_wvtree_noptrs_test.cpp @@ -0,0 +1,80 @@ +/* static_sequence_wvtree_noptrs_test.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_sequence_wvtree_noptrs_test + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "static_sequence_tester.h" + +int main(int argc, char ** argv) { + if(argc!=6) { + cout << "Usage: " << argv[0] << " " << endl; + return 0; + } + stringstream ss; + ss << argv[4]; + uint samp; + ss >> samp; + + uint * text; + uint n; + load(argv[1],&text,&n); + + alphabet_mapper * am; + + static_bitsequence_builder * bmb; + + if(string(argv[2])==string("b")) + bmb = new static_bitsequence_builder_brw32(samp); + else + bmb = new static_bitsequence_builder_rrr02(samp); + + if(string(argv[3])==string("p")) + am = new alphabet_mapper_none(); + else + am = new alphabet_mapper_cont(text,n,bmb); + + static_sequence_builder * ssb = new static_sequence_builder_wvtree_noptrs(bmb,am); + static_sequence * sseq = ssb->build(text,n); + + delete bmb; + delete ssb; + sseq = savetest(argv[1], sseq); + if(string(argv[5])==string("t")) + test_static_sequence(text,n,sseq); + else + cout << "Size: " << sseq->size() << endl; + cout << "*************************************" << endl; + speed_access(sseq,text,n); + cout << "*************************************" << endl; + speed_rank(sseq,text,n); + cout << "*************************************" << endl; + speed_select(sseq,text,n); + + delete [] text; + delete sseq; +} diff --git a/tests/static_sequence_wvtree_test.cpp b/tests/static_sequence_wvtree_test.cpp new file mode 100644 index 0000000..6c2820d --- /dev/null +++ b/tests/static_sequence_wvtree_test.cpp @@ -0,0 +1,82 @@ +/* static_sequence_wvtree_test.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_sequence_wvtree_test + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "static_sequence_tester.h" +using namespace std; + +int main(int argc, char ** argv) { + if(argc!=6) { + cout << "Usage: " << argv[0] << " " << endl; + return 0; + } + stringstream ss; + ss << argv[4]; + uint samp; + ss >> samp; + + uint * text; + uint n; + load(argv[1],&text,&n); + + alphabet_mapper * am = new alphabet_mapper_none(); + + static_bitsequence_builder * bmb; + if(string(argv[2])==string("b")) + bmb = new static_bitsequence_builder_brw32(samp); + else + bmb = new static_bitsequence_builder_rrr02(samp); + + wt_coder * wc; + if(string(argv[3])==string("p")) + wc = new wt_coder_binary(text,n,am); + else + wc = new wt_coder_huff(text,n,am); + + static_sequence_builder * ssb = new static_sequence_builder_wvtree(wc,bmb,am); + static_sequence * sseq = ssb->build(text,n); + delete bmb; + delete ssb; + + sseq = savetest(argv[1], sseq); + if(string(argv[5])==string("t")) + test_static_sequence(text,n,sseq); + else + cout << "Size: " << sseq->size() << endl; + cout << "*************************************" << endl; + speed_access(sseq,text,n); + cout << "*************************************" << endl; + speed_rank(sseq,text,n); + cout << "*************************************" << endl; + speed_select(sseq,text,n); + + delete sseq; + delete [] text; +} + diff --git a/tests/test_brw32.cpp b/tests/test_brw32.cpp new file mode 100644 index 0000000..4020bb0 --- /dev/null +++ b/tests/test_brw32.cpp @@ -0,0 +1,84 @@ + +#include +#include +#include +#include + +using namespace std; + +int main(int argc, char ** argv) { + if(argc!=3) { + cout << "usage: " << argv[0] << " " << endl; + return 0; + } + FILE * fp = fopen(argv[1],"r"); + if(fp==NULL) { + cout << "Error opening " << argv[1] << endl; + return -1; + } + uint *bitseq, len; + uint l = fread(&len, sizeof(uint), 1, fp); + bitseq = new uint[uint_len(len,1)]; + l += fread(bitseq, sizeof(uint), uint_len(len,1), fp); + fclose(fp); + + uint sample_rate; + stringstream ss(argv[2]); + ss >> sample_rate; + + static_bitsequence * bs = new static_bitsequence_brw32(bitseq,len,sample_rate); + + char * fname = new char[string(argv[1]).length()+10]; + sprintf(fname,"%s.rrr",argv[1]); + + fp = fopen(fname,"w"); + cout << "Save: " << bs->save(fp) << endl; + fclose(fp); + delete bs; + + fp = fopen(fname,"r"); + bs = static_bitsequence::load(fp); + cout << bs << endl; + fclose(fp); + delete [] fname; + + cout << "Bitmap length: " << len << " =? " << bs->length() << endl; + cout << "Ones: " << bs->count_one() << endl; + cout << "Bitmap size: " << bs->size() << endl; + /*for(uint i=0;i<64;i++) { + if(i%15==0) cout << " "; + cout << (bs->access(i)?"1":"0"); + } + cout << endl;*/ + uint ones = 0; + for(uint i=0;iaccess(i) != (bitget(bitseq,i)!=0)) { + cout << "Access error for position " << i << endl; + cout << " got: " << bs->access(i) << " expected: " << (bitget(bitseq,i)!=0) << endl; + } + if(bs->rank1(i) != ones) { + cout << "Rank1 error for position " << i << endl; + cout << " got: " << bs->rank1(i) << " expected: " << ones << endl; + } + if(bitget(bitseq,i) && bs->select1(ones) != i) { + cout << "Select1 error for position " << i << " ones:" << ones << endl; + cout << " got: " << bs->select1(ones) << " expected: " << i << endl; + } + if(bs->rank0(i) != i+1-ones) { + cout << "Rank0 error for position " << i << endl; + cout << " got: " << bs->rank0(i) << " expected: " << ones << endl; + } + if(!bitget(bitseq,i) && bs->select0(i+1-ones) != i) { + cout << "Select0 error for position " << i << endl; + cout << " got: " << bs->select0(i+1-ones) << " expected: " << i << endl; + } + } + delete bs; + delete [] bitseq; + cout << "Test completed." << endl; + return 0; +} + diff --git a/tests/test_naive.cpp b/tests/test_naive.cpp new file mode 100644 index 0000000..3ce65ff --- /dev/null +++ b/tests/test_naive.cpp @@ -0,0 +1,49 @@ + +#include +#include +#include + +using namespace std; + +int main(int argc, char ** argv) { + if(argc!=2) { + cout << "usage: " << argv[0] << " " << endl; + return 0; + } + FILE * fp = fopen(argv[1],"r"); + if(fp==NULL) { + cout << "Error opening " << argv[1] << endl; + return -1; + } + uint *bitseq, len; + uint l = fread(&len, sizeof(uint), 1, fp); + bitseq = new uint[uint_len(len,1)]; + l += fread(bitseq, sizeof(uint), uint_len(len,1), fp); + static_bitsequence * bs = new static_bitsequence_naive(bitseq,len); + cout << "Bitmap length: " << len << " =? " << bs->length() << endl; + uint ones = 0; + for(uint i=0;irank1(i) != ones) { + cout << "Rank1 error for position " << i << endl; + cout << " got: " << bs->rank1(i) << " expected: " << ones << endl; + } + if(bitget(bitseq,i) && bs->select1(ones) != i) { + cout << "Select1 error for position " << i << endl; + cout << " got: " << bs->select1(ones) << " expected: " << i << endl; + } + if(bs->rank0(i) != i+1-ones) { + cout << "Rank0 error for position " << i << endl; + cout << " got: " << bs->rank0(i) << " expected: " << ones << endl; + } + if(!bitget(bitseq,i) && bs->select0(i+1-ones) != i) { + cout << "Select0 error for position " << i << endl; + cout << " got: " << bs->select0(ones) << " expected: " << i << endl; + } + } + delete bs; + fclose(fp); + cout << "Test completed." << endl; + return 0; +} + diff --git a/tests/test_rrr02.cpp b/tests/test_rrr02.cpp new file mode 100644 index 0000000..5c460f9 --- /dev/null +++ b/tests/test_rrr02.cpp @@ -0,0 +1,84 @@ + +#include +#include +#include +#include + +using namespace std; + +int main(int argc, char ** argv) { + if(argc!=3) { + cout << "usage: " << argv[0] << " " << endl; + return 0; + } + FILE * fp = fopen(argv[1],"r"); + if(fp==NULL) { + cout << "Error opening " << argv[1] << endl; + return -1; + } + uint *bitseq, len; + uint l=fread(&len, sizeof(uint), 1, fp); + bitseq = new uint[uint_len(len,1)]; + l+=fread(bitseq, sizeof(uint), uint_len(len,1), fp); + fclose(fp); + + static_bitsequence * bs = new static_bitsequence_rrr02(bitseq,len); + + char * fname = new char[string(argv[1]).length()+10]; + sprintf(fname,"%s.rrr",argv[1]); + + fp = fopen(fname,"w"); + cout << "Save: " << bs->save(fp) << endl; + fclose(fp); + delete bs; + + fp = fopen(fname,"r"); + bs = static_bitsequence::load(fp); + uint sample_rate; + stringstream ss(argv[2]); + ss >> sample_rate; + ((static_bitsequence_rrr02*)bs)->create_sampling(sample_rate); + fclose(fp); + delete [] fname; + + cout << "Bitmap length: " << len << " =? " << bs->length() << endl; + cout << "Ones: " << bs->count_one() << endl; + cout << "Bitmap size: " << bs->size() << endl; + /*for(uint i=0;i<64;i++) { + if(i%15==0) cout << " "; + cout << (bs->access(i)?"1":"0"); + } + cout << endl;*/ + uint ones = 0; + for(uint i=0;iaccess(i) != (bitget(bitseq,i)!=0)) { + cout << "Access error for position " << i << endl; + cout << " got: " << bs->access(i) << " expected: " << (bitget(bitseq,i)!=0) << endl; + } + if(bs->rank1(i) != ones) { + cout << "Rank1 error for position " << i << endl; + cout << " got: " << bs->rank1(i) << " expected: " << ones << endl; + } + if(bitget(bitseq,i) && bs->select1(ones) != i) { + cout << "Select1 error for position " << i << " ones:" << ones << endl; + cout << " got: " << bs->select1(ones) << " expected: " << i << endl; + } + if(bs->rank0(i) != i+1-ones) { + cout << "Rank0 error for position " << i << endl; + cout << " got: " << bs->rank0(i) << " expected: " << ones << endl; + } + if(!bitget(bitseq,i) && bs->select0(i+1-ones) != i) { + cout << "Select0 error for position " << i << endl; + cout << " got: " << bs->select0(i+1-ones) << " expected: " << i << endl; + } + } + delete bs; + //static_bitsequence_rrr02::delete_E(); + delete [] bitseq; + cout << "Test completed." << endl; + return 0; +} + diff --git a/tests/test_wvtree01.cpp b/tests/test_wvtree01.cpp new file mode 100644 index 0000000..ffd09c3 --- /dev/null +++ b/tests/test_wvtree01.cpp @@ -0,0 +1,134 @@ + +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +void test_static_sequence(uint * symbols, uint n, static_sequence * ss) { + cout << "Size: " << ss->size() << endl; + uint max_v=0; + for(uint i=0;iaccess(i); + uint r = ss->rank(symbols[i],i); + uint s = ss->select(symbols[i],occ[symbols[i]]); + uint rM1 = (i==0)?0:ss->rank(symbols[i],i-1); + if(r!=occ[symbols[i]]) { + cout << "Error in rank for symbol " << symbols[i] << " at position " << i << endl; + cout << "value: " << r << endl; + cout << "Expected: " << occ[symbols[i]] << endl; + error = true; + } + if(s!=i) { + cout << "Error in select for symbol " << symbols[i] << " at position " << occ[symbols[i]] << endl; + cout << "value: " << s << endl; + cout << "Expected: " << i << endl; + error = true; + } + if(a!=symbols[i]) { + cout << "Error in access at position " << i << endl; + cout << "value: " << a << endl; + cout << "Expected: " << symbols[i] << endl; + error = true; + } + if(rM1!=occ[symbols[i]]-1) { + cout << "Error in rankM1 for symbol " << symbols[i] << " at position " << i-1 << endl; + cout << "value: " << rM1 << endl; + cout << "Expected: " << occ[symbols[i]]-1 << endl; + error = true; + } + } + if(!error) + cout << "Test OK! It works :)" << endl; + delete [] occ; +} + +int main(int argc, char ** argv) { + if(argc!=3) { + cout << "usage: " << argv[0] << " " << endl; + return 0; + } + struct stat text_info; + if(stat(argv[1],&text_info)<0) { + cout << "could not stat: " << argv[1] << endl; + return -1; + } + + stringstream ss; + ss << argv[2]; + uint samp; + ss >> samp; + + uint n= (uint)text_info.st_size/4; + uint * text = new uint[n+1]; + FILE * fp = fopen(argv[1],"r"); + if(fp==NULL) { + cout << "could not open " << argv[1] << endl; + return -1; + } + + cout << "File: " << argv[1] << endl; + cout << "Length: " << n << endl; + + uint max_symbol = 0; + for(uint i=0;isave(fp); + fclose(fp); + delete wt; + fp = fopen(fname,"r"); + wt = static_sequence::load(fp); + fclose(fp); + delete [] fname; + + test_static_sequence(text,n,wt); + + cout << "WT Size: " << wt->size() << endl; + cout << "ft = " << 1.*wt->size()/(bits(max_symbol-1)*n/8) << endl; + + delete [] text; + delete wt; + +} diff --git a/tests/test_wvtree02.cpp b/tests/test_wvtree02.cpp new file mode 100644 index 0000000..9b49aca --- /dev/null +++ b/tests/test_wvtree02.cpp @@ -0,0 +1,144 @@ + +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +void test_static_sequence(uint * symbols, uint n, static_sequence * ss) { + cout << "Size: " << ss->size() << endl; + uint max_v=0; + for(uint i=0;iaccess(i); + uint r = ss->rank(symbols[i],i); + uint s = ss->select(symbols[i],occ[symbols[i]]); + uint rM1 = (i==0)?0:ss->rank(symbols[i],i-1); + if(r!=occ[symbols[i]]) { + cout << "Error in rank for symbol " << symbols[i] << " at position " << i << endl; + cout << "value: " << r << endl; + cout << "Expected: " << occ[symbols[i]] << endl; + error = true; + } + if(s!=i) { + cout << "Error in select for symbol " << symbols[i] << " at position " << occ[symbols[i]] << endl; + cout << "value: " << s << endl; + cout << "Expected: " << i << endl; + error = true; + } + if(a!=symbols[i]) { + cout << "Error in access at position " << i << endl; + cout << "value: " << a << endl; + cout << "Expected: " << symbols[i] << endl; + error = true; + } + if(rM1!=occ[symbols[i]]-1) { + cout << "Error in rankM1 for symbol " << symbols[i] << " at position " << i-1 << endl; + cout << "value: " << rM1 << endl; + cout << "Expected: " << occ[symbols[i]]-1 << endl; + error = true; + } + } + if(!error) + cout << "Test OK! It works :)" << endl; + delete [] occ; +} + +int main(int argc, char ** argv) { + if(argc!=3) { + cout << "usage: " << argv[0] << " " << endl; + return 0; + } + struct stat text_info; + if(stat(argv[1],&text_info)<0) { + cout << "could not stat: " << argv[1] << endl; + return -1; + } + + uint samp; + { + stringstream ss; + ss << string(argv[2]); + + ss >> samp; + } + + uint n= (uint)text_info.st_size; + uint * text = new uint[n+1]; + FILE * fp = fopen(argv[1],"r"); + if(fp==NULL) { + cout << "could not open " << argv[1] << endl; + return -1; + } + + cout << "File: " << argv[1] << endl; + cout << "Length: " << n << endl; + + uint max_symbol = 0; + for(uint i=0;isave(fp) << endl; + fclose(fp); + delete wt; + fp = fopen(fname,"r"); + wt = static_sequence::load(fp); + fclose(fp); + delete [] fname; + + test_static_sequence(text,n,wt); + + cout << "WT Size: " << wt->size() << endl; + cout << "ft = " << 1.*wt->size()/n << endl; + + fname = new char[10+string(argv[1]).length()]; + sprintf(fname,"%s.wt",argv[1]); + fp = fopen(fname,"w"); + cout << "save: " << wt->save(fp) << endl; + fclose(fp); + delete [] fname; + + delete [] text; + delete wt; + +} diff --git a/tests/text_to_int.cpp b/tests/text_to_int.cpp new file mode 100644 index 0000000..74f676d --- /dev/null +++ b/tests/text_to_int.cpp @@ -0,0 +1,74 @@ +/* test_to_int.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * text_to_int + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "static_sequence_tester.h" + +int main(int argc, char ** argv) { + if(argc!=3) { + cout << "Usage: " << argv[0] << " " << endl; + return 0; + } + char * fname = argv[1]; + char * oname = argv[2]; + + FILE * fp = fopen(fname,"r"); + if(fp==NULL) { + cout << "could not open " << fname << endl; + return 1; + } + struct stat text_info; + if(stat(fname,&text_info)<0) { + cout << "could not stat: " << fname << endl; + return 1; + } + + uint n= (uint)text_info.st_size; + uint * text = new uint[n]; + + for(uint i=0;i