diff --git a/src/Makefile.am b/src/Makefile.am index 5c2d7c8..19d9476 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -27,7 +27,7 @@ noinst_HEADERS = asfont.h button.h cpu.h dirdialog.h FastDelegate.h \ AM_CFLAGS= @CFLAGS@ @SDL_CFLAGS@ -AM_CXXFLAGS = @CXXFLAGS@ @SDL_CFLAGS@ -Isparsehash-1.6/src -DTARGET_GP2X \ +AM_CXXFLAGS = @CXXFLAGS@ @SDL_CFLAGS@ -DTARGET_GP2X \ -Wall -Wextra -Wundef -Wunused-macros gmenu2x_LDADD = @LIBS@ @SDL_LIBS@ -lSDL_image -lSDL_gfx -lSDL -ljpeg -lpng12 -lz -ldl -lpthread diff --git a/src/sparsehash-1.6/AUTHORS b/src/sparsehash-1.6/AUTHORS deleted file mode 100644 index ee92be8..0000000 --- a/src/sparsehash-1.6/AUTHORS +++ /dev/null @@ -1,2 +0,0 @@ -opensource@google.com - diff --git a/src/sparsehash-1.6/COPYING b/src/sparsehash-1.6/COPYING deleted file mode 100644 index e4956cf..0000000 --- a/src/sparsehash-1.6/COPYING +++ /dev/null @@ -1,28 +0,0 @@ -Copyright (c) 2005, Google Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/sparsehash-1.6/ChangeLog b/src/sparsehash-1.6/ChangeLog deleted file mode 100644 index 789eb16..0000000 --- a/src/sparsehash-1.6/ChangeLog +++ /dev/null @@ -1,182 +0,0 @@ -Fri Jan 8 14:47:55 2010 Google Inc. - - * sparsehash: version 1.6 release - * New accessor methods for deleted_key, empty_key (sjackman) - * Use explicit hash functions in sparsehash tests (csilvers) - * BUGFIX: Cast resize to fix SUNWspro bug (csilvers) - * Check for sz overflow in min_size (csilvers) - * Speed up clear() for dense and sparse hashtables (jeff) - * Avoid shrinking in all cases when min-load is 0 (shaunj, csilvers) - * Improve densehashtable code for the deleted key (gpike) - * BUGFIX: Fix operator= when the 2 empty-keys differ (andreidam) - * BUGFIX: Fix ht copying when empty-key isn't set (andreidam) - * PORTING: Use TmpFile() instead of /tmp on MinGW (csilvers) - * PORTING: Use filenames that work with Stratus VOS. - -Tue May 12 14:16:38 2009 Google Inc. - - * sparsehash: version 1.5.2 release - * Fix compile error: not initializing set_key in all constructors - -Fri May 8 15:23:44 2009 Google Inc. - - * sparsehash: version 1.5.1 release - * Fix broken equal_range() for all the hash-classes (csilvers) - -Wed May 6 11:28:49 2009 Google Inc. - - * sparsehash: version 1.5 release - * Support the tr1 unordered_map (and unordered_set) API (csilvers) - * Store only key for delkey; reduces need for 0-arg c-tor (csilvers) - * Prefer unordered_map to hash_map for the timing test (csilvers) - * PORTING: update the resource use for 64-bit machines (csilvers) - * PORTING: fix MIN/MAX collisions by un-#including windows.h (csilvers) - * Updated autoconf version to 2.61 and libtool version to 1.5.26 - -Wed Jan 28 17:11:31 2009 Google Inc. - - * sparsehash: version 1.4 release - * Allow hashtables to be <32 buckets (csilvers) - * Fix initial-sizing bug: was sizing tables too small (csilvers) - * Add asserts that clients don't abuse deleted/empty key (csilvers) - * Improve determination of 32/64 bit for C code (csilvers) - * Small fix for doc files in rpm (csilvers) - -Thu Nov 6 15:06:09 2008 Google Inc. - - * sparsehash: version 1.3 release - * Add an interface to change the parameters for resizing (myl) - * Document another potentially good hash function (csilvers) - -Thu Sep 18 13:53:20 2008 Google Inc. - - * sparsehash: version 1.2 release - * Augment documentation to better describe namespace issues (csilvers) - * BUG FIX: replace hash<> with SPARSEHASH_HASH, for windows (csilvers) - * Add timing test to unittest to test repeated add+delete (csilvers) - * Do better picking a new size when resizing (csilvers) - * Use ::google instead of google as a namespace (csilvers) - * Improve threading test at config time (csilvers) - -Mon Feb 11 16:30:11 2008 Google Inc. - - * sparsehash: version 1.1 release - * Fix brown-paper-bag bug in some constructors (rafferty) - * Fix problem with variables shadowing member vars, add -Wshadow - -Thu Nov 29 11:44:38 2007 Google Inc. - - * sparsehash: version 1.0.2 release - * Fix a final reference to hash<> to use SPARSEHASH_HASH<> instead. - -Wed Nov 14 08:47:48 2007 Google Inc. - - * sparsehash: version 1.0.1 release :-( - * Remove an unnecessary (harmful) "#define hash" in windows' config.h - -Tue Nov 13 15:15:46 2007 Google Inc. - - * sparsehash: version 1.0 release! We are now out of beta. - * Clean up Makefile awk script to be more readable (csilvers) - * Namespace fixes: use fewer #defines, move typedefs into namespace - -Fri Oct 12 12:35:24 2007 Google Inc. - - * sparsehash: version 0.9.1 release - * Fix Makefile awk script to work on more architectures (csilvers) - * Add test to test code in more 'real life' situations (csilvers) - -Tue Oct 9 14:15:21 2007 Google Inc. - - * sparsehash: version 0.9 release - * More type-hygiene improvements, especially for 64-bit (csilvers) - * Some configure improvements to improve portability, utility (austern) - * Small bugfix for operator== for dense_hash_map (jeff) - -Tue Jul 3 12:55:04 2007 Google Inc. - - * sparsehash: version 0.8 release - * Minor type-hygiene improvements: size_t for int, etc. (csilvers) - * Porting improvements: tests pass on OS X, FreeBSD, Solaris (csilvers) - * Full windows port! VS solution provided for all unittests (csilvers) - -Mon Jun 11 11:33:41 2007 Google Inc. - - * sparsehash: version 0.7 release - * Syntax fixes to better support gcc 4.3 and VC++ 7 (mec, csilvers) - * Improved windows/VC++ support (see README.windows) (csilvers) - * Config improvements: better tcmalloc support and config.h (csilvers) - * More robust with missing hash_map + nix 'trampoline' .h's (csilvers) - * Support for STLport's hash_map/hash_fun locations (csilvers) - * Add .m4 files to distribution; now all source is there (csilvers) - * Tiny modification of shrink-threshhold to allow never-shrinking (amc) - * Protect timing tests against aggressive optimizers (csilvers) - * Extend time_hash_map to test bigger objects (csilvers) - * Extend type-trait support to work with const objects (csilvers) - * USER VISIBLE: speed up all code by replacing memmove with memcpy - (csilvers) - -Tue Mar 20 17:29:34 2007 Google Inc. - - * sparsehash: version 0.6 release - * Some improvement to type-traits (jyasskin) - * Better timing results when google-perftools is installed (sanjay) - * Updates and fixes to html documentation and README (csilvers) - * A bit more careful about #includes (csilvers) - * Fix for typo that broken compilation on some systems (csilvers) - * USER VISIBLE: New clear_no_resize() method added to dense_hash_map - (uszkoreit) - -Sat Oct 21 13:47:47 2006 Google Inc. - - * sparsehash: version 0.5 release - * Support uint16_t (SunOS) in addition to u_int16_t (BSD) (csilvers) - * Get rid of UNDERSTANDS_ITERATOR_TAGS; everyone understands (csilvers) - * Test that empty-key and deleted-key differ (rbayardo) - * Fix example docs: strcmp needs to test for NULL (csilvers) - -Sun Apr 23 22:42:35 2006 Google Inc. - - * sparsehash: version 0.4 release - * Remove POD requirement for keys and values! (austern) - * Add tr1-compatible type-traits system to speed up POD ops. (austern) - * Fixed const-iterator bug where postfix ++ didn't compile. (csilvers) - * Fixed iterator comparison bugs where <= was incorrect. (csilvers) - * Clean up config.h to keep its #defines from conflicting. (csilvers) - * Big documentation sweep and cleanup. (csilvers) - * Update documentation to talk more about good hash fns. (csilvers) - * Fixes to compile on MSVC (working around some MSVC bugs). (rennie) - * Avoid resizing hashtable on operator[] lookups (austern) - -Thu Nov 3 20:12:31 2005 Google Inc. - - * sparsehash: version 0.3 release - * Quiet compiler warnings on some compilers. (csilvers) - * Some documentation fixes: example code for dense_hash_map. (csilvers) - * Fix a bug where swap() wasn't swapping delete_key(). (csilvers) - * set_deleted_key() and set_empty_key() now take a key only, - allowing hash-map values to be forward-declared. (csilvers) - * support for std::insert_iterator (and std::inserter). (csilvers) - -Mon May 2 07:04:46 2005 Google Inc. - - * sparsehash: version 0.2 release - * Preliminary support for msvc++ compilation. (csilvers) - * Documentation fixes -- some example code was incomplete! (csilvers) - * Minimize size of config.h to avoid other-package conflicts (csilvers) - * Contribute a C-based version of sparsehash that served as the - inspiration for this code. One day, I hope to clean it up and - support it, but for now it's just in experimental/, for playing - around with. (csilvers) - * Change default namespace from std to google. (csilvers) - -Fri Jan 14 16:53:32 2005 Google Inc. - - * sparsehash: initial release: - The sparsehash package contains several hash-map implementations, - similar in API to SGI's hash_map class, but with different - performance characteristics. sparse_hash_map uses very little - space overhead: 1-2 bits per entry. dense_hash_map is typically - faster than the default SGI STL implementation. This package - also includes hash-set analogues of these classes. - diff --git a/src/sparsehash-1.6/INSTALL b/src/sparsehash-1.6/INSTALL deleted file mode 100644 index 23e5f25..0000000 --- a/src/sparsehash-1.6/INSTALL +++ /dev/null @@ -1,236 +0,0 @@ -Installation Instructions -************************* - -Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005 Free -Software Foundation, Inc. - -This file is free documentation; the Free Software Foundation gives -unlimited permission to copy, distribute and modify it. - -Basic Installation -================== - -These are generic installation instructions. - - The `configure' shell script attempts to guess correct values for -various system-dependent variables used during compilation. It uses -those values to create a `Makefile' in each directory of the package. -It may also create one or more `.h' files containing system-dependent -definitions. Finally, it creates a shell script `config.status' that -you can run in the future to recreate the current configuration, and a -file `config.log' containing compiler output (useful mainly for -debugging `configure'). - - It can also use an optional file (typically called `config.cache' -and enabled with `--cache-file=config.cache' or simply `-C') that saves -the results of its tests to speed up reconfiguring. (Caching is -disabled by default to prevent problems with accidental use of stale -cache files.) - - If you need to do unusual things to compile the package, please try -to figure out how `configure' could check whether to do them, and mail -diffs or instructions to the address given in the `README' so they can -be considered for the next release. If you are using the cache, and at -some point `config.cache' contains results you don't want to keep, you -may remove or edit it. - - The file `configure.ac' (or `configure.in') is used to create -`configure' by a program called `autoconf'. You only need -`configure.ac' if you want to change it or regenerate `configure' using -a newer version of `autoconf'. - -The simplest way to compile this package is: - - 1. `cd' to the directory containing the package's source code and type - `./configure' to configure the package for your system. If you're - using `csh' on an old version of System V, you might need to type - `sh ./configure' instead to prevent `csh' from trying to execute - `configure' itself. - - Running `configure' takes awhile. While running, it prints some - messages telling which features it is checking for. - - 2. Type `make' to compile the package. - - 3. Optionally, type `make check' to run any self-tests that come with - the package. - - 4. Type `make install' to install the programs and any data files and - documentation. - - 5. You can remove the program binaries and object files from the - source code directory by typing `make clean'. To also remove the - files that `configure' created (so you can compile the package for - a different kind of computer), type `make distclean'. There is - also a `make maintainer-clean' target, but that is intended mainly - for the package's developers. If you use it, you may have to get - all sorts of other programs in order to regenerate files that came - with the distribution. - -Compilers and Options -===================== - -Some systems require unusual options for compilation or linking that the -`configure' script does not know about. Run `./configure --help' for -details on some of the pertinent environment variables. - - You can give `configure' initial values for configuration parameters -by setting variables in the command line or in the environment. Here -is an example: - - ./configure CC=c89 CFLAGS=-O2 LIBS=-lposix - - *Note Defining Variables::, for more details. - -Compiling For Multiple Architectures -==================================== - -You can compile the package for more than one kind of computer at the -same time, by placing the object files for each architecture in their -own directory. To do this, you must use a version of `make' that -supports the `VPATH' variable, such as GNU `make'. `cd' to the -directory where you want the object files and executables to go and run -the `configure' script. `configure' automatically checks for the -source code in the directory that `configure' is in and in `..'. - - If you have to use a `make' that does not support the `VPATH' -variable, you have to compile the package for one architecture at a -time in the source code directory. After you have installed the -package for one architecture, use `make distclean' before reconfiguring -for another architecture. - -Installation Names -================== - -By default, `make install' installs the package's commands under -`/usr/local/bin', include files under `/usr/local/include', etc. You -can specify an installation prefix other than `/usr/local' by giving -`configure' the option `--prefix=PREFIX'. - - You can specify separate installation prefixes for -architecture-specific files and architecture-independent files. If you -pass the option `--exec-prefix=PREFIX' to `configure', the package uses -PREFIX as the prefix for installing programs and libraries. -Documentation and other data files still use the regular prefix. - - In addition, if you use an unusual directory layout you can give -options like `--bindir=DIR' to specify different values for particular -kinds of files. Run `configure --help' for a list of the directories -you can set and what kinds of files go in them. - - If the package supports it, you can cause programs to be installed -with an extra prefix or suffix on their names by giving `configure' the -option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. - -Optional Features -================= - -Some packages pay attention to `--enable-FEATURE' options to -`configure', where FEATURE indicates an optional part of the package. -They may also pay attention to `--with-PACKAGE' options, where PACKAGE -is something like `gnu-as' or `x' (for the X Window System). The -`README' should mention any `--enable-' and `--with-' options that the -package recognizes. - - For packages that use the X Window System, `configure' can usually -find the X include and library files automatically, but if it doesn't, -you can use the `configure' options `--x-includes=DIR' and -`--x-libraries=DIR' to specify their locations. - -Specifying the System Type -========================== - -There may be some features `configure' cannot figure out automatically, -but needs to determine by the type of machine the package will run on. -Usually, assuming the package is built to be run on the _same_ -architectures, `configure' can figure that out, but if it prints a -message saying it cannot guess the machine type, give it the -`--build=TYPE' option. TYPE can either be a short name for the system -type, such as `sun4', or a canonical name which has the form: - - CPU-COMPANY-SYSTEM - -where SYSTEM can have one of these forms: - - OS KERNEL-OS - - See the file `config.sub' for the possible values of each field. If -`config.sub' isn't included in this package, then this package doesn't -need to know the machine type. - - If you are _building_ compiler tools for cross-compiling, you should -use the option `--target=TYPE' to select the type of system they will -produce code for. - - If you want to _use_ a cross compiler, that generates code for a -platform different from the build platform, you should specify the -"host" platform (i.e., that on which the generated programs will -eventually be run) with `--host=TYPE'. - -Sharing Defaults -================ - -If you want to set default values for `configure' scripts to share, you -can create a site shell script called `config.site' that gives default -values for variables like `CC', `cache_file', and `prefix'. -`configure' looks for `PREFIX/share/config.site' if it exists, then -`PREFIX/etc/config.site' if it exists. Or, you can set the -`CONFIG_SITE' environment variable to the location of the site script. -A warning: not all `configure' scripts look for a site script. - -Defining Variables -================== - -Variables not defined in a site shell script can be set in the -environment passed to `configure'. However, some packages may run -configure again during the build, and the customized values of these -variables may be lost. In order to avoid this problem, you should set -them in the `configure' command line, using `VAR=value'. For example: - - ./configure CC=/usr/local2/bin/gcc - -causes the specified `gcc' to be used as the C compiler (unless it is -overridden in the site shell script). Here is a another example: - - /bin/bash ./configure CONFIG_SHELL=/bin/bash - -Here the `CONFIG_SHELL=/bin/bash' operand causes subsequent -configuration-related scripts to be executed by `/bin/bash'. - -`configure' Invocation -====================== - -`configure' recognizes the following options to control how it operates. - -`--help' -`-h' - Print a summary of the options to `configure', and exit. - -`--version' -`-V' - Print the version of Autoconf used to generate the `configure' - script, and exit. - -`--cache-file=FILE' - Enable the cache: use and save the results of the tests in FILE, - traditionally `config.cache'. FILE defaults to `/dev/null' to - disable caching. - -`--config-cache' -`-C' - Alias for `--cache-file=config.cache'. - -`--quiet' -`--silent' -`-q' - Do not print messages saying which checks are being made. To - suppress all normal output, redirect it to `/dev/null' (any error - messages will still be shown). - -`--srcdir=DIR' - Look for the package's source code in directory DIR. Usually - `configure' can determine that directory automatically. - -`configure' also accepts some other, not widely useful, options. Run -`configure --help' for more details. - diff --git a/src/sparsehash-1.6/Makefile.am b/src/sparsehash-1.6/Makefile.am deleted file mode 100644 index 783baa6..0000000 --- a/src/sparsehash-1.6/Makefile.am +++ /dev/null @@ -1,157 +0,0 @@ -## Process this file with automake to produce Makefile.in - -# Make sure that when we re-make ./configure, we get the macros we need -ACLOCAL_AMFLAGS = -I m4 - -# This is so we can #include -AM_CPPFLAGS = -I$(top_srcdir)/src - -# These are good warnings to turn on by default -if GCC -AM_CXXFLAGS = -Wall -Wwrite-strings -Woverloaded-virtual -Wno-sign-compare -Wshadow -endif - -googleincludedir = $(includedir)/google -## The .h files you want to install (that is, .h files that people -## who install this package can include in their own applications.) -googleinclude_HEADERS = \ - src/google/dense_hash_map \ - src/google/dense_hash_set \ - src/google/sparse_hash_map \ - src/google/sparse_hash_set \ - src/google/sparsetable \ - src/google/type_traits.h - -docdir = $(prefix)/share/doc/$(PACKAGE)-$(VERSION) -## This is for HTML and other documentation you want to install. -## Add your documentation files (in doc/) in addition to these boilerplate -## Also add a TODO file if you have one -dist_doc_DATA = AUTHORS COPYING ChangeLog INSTALL NEWS README README.windows \ - TODO \ - doc/dense_hash_map.html \ - doc/dense_hash_set.html \ - doc/sparse_hash_map.html \ - doc/sparse_hash_set.html \ - doc/sparsetable.html \ - doc/implementation.html \ - doc/performance.html \ - doc/index.html \ - doc/designstyle.css - -## The libraries (.so's) you want to install -lib_LTLIBRARIES = -## The location of the windows project file for each binary we make -WINDOWS_PROJECTS = google-sparsehash.sln - -## unittests you want to run when people type 'make check'. -## TESTS is for binary unittests, check_SCRIPTS for script-based unittests. -## TESTS_ENVIRONMENT sets environment variables for when you run unittest, -## but it only seems to take effect for *binary* unittests (argh!) -TESTS = type_traits_unittest sparsetable_unittest hashtable_unittest \ - simple_test -# TODO(csilvers): get simple_test working on windows -WINDOWS_PROJECTS += vsprojects/type_traits_unittest/type_traits_unittest.vcproj \ - vsprojects/sparsetable_unittest/sparsetable_unittest.vcproj \ - vsprojects/hashtable_unittest/hashtable_unittest.vcproj -check_SCRIPTS = -TESTS_ENVIRONMENT = - -## This should always include $(TESTS), but may also include other -## binaries that you compile but don't want automatically installed. -noinst_PROGRAMS = $(TESTS) time_hash_map -WINDOWS_PROJECTS += vsprojects/time_hash_map/time_hash_map.vcproj - - -## vvvv RULES TO MAKE THE LIBRARIES, BINARIES, AND UNITTESTS - -# All our .h files need to read the config information in config.h. The -# autoheader config.h has too much info, including PACKAGENAME, that -# might conflict with other config.h's an application might #include. -# Thus, we create a "minimal" config.h, called sparseconfig.h, that -# includes only the #defines we really need, and that are unlikely to -# change from system to system. NOTE: The awk command is equivalent to -# fgrep -B2 -f- $(top_builddir)/src/config.h \ -# fgrep -vx -e -- > _sparsehash_config -# For correctness, it depends on the fact config.h.include does not have -# any lines starting with #. -src/google/sparsehash/sparseconfig.h: $(top_builddir)/src/config.h \ - $(top_srcdir)/src/config.h.include - [ -d $(@D) ] || mkdir -p $(@D) - echo "/*" > $(@D)/_sparsehash_config - echo " * NOTE: This file is for internal use only." >> $(@D)/_sparsehash_config - echo " * Do not use these #defines in your own program!" >> $(@D)/_sparsehash_config - echo " */" >> $(@D)/_sparsehash_config - $(AWK) '{prevline=currline; currline=$$0;} \ - /^#/ {in_second_file = 1;} \ - !in_second_file {if (currline !~ /^ *$$/) {inc[currline]=0}}; \ - in_second_file { for (i in inc) { \ - if (index(currline, i) != 0) { \ - print "\n"prevline"\n"currline; \ - delete inc[i]; \ - } \ - } }' \ - $(top_srcdir)/src/config.h.include $(top_builddir)/src/config.h \ - >> $(@D)/_sparsehash_config - mv -f $(@D)/_sparsehash_config $@ -# This is how we tell automake about auto-generated .h files -BUILT_SOURCES = src/google/sparsehash/sparseconfig.h -CLEANFILES = src/google/sparsehash/sparseconfig.h - -sparsehashincludedir = $(googleincludedir)/sparsehash -sparsehashinclude_HEADERS = \ - src/google/sparsehash/densehashtable.h \ - src/google/sparsehash/sparsehashtable.h -nodist_sparsehashinclude_HEADERS = src/google/sparsehash/sparseconfig.h - -type_traits_unittest_SOURCES = \ - src/type_traits_unittest.cc \ - $(sparsehashinclude_HEADERS) \ - src/google/type_traits.h -nodist_type_traits_unittest_SOURCES = $(nodist_sparsehashinclude_HEADERS) - -sparsetable_unittest_SOURCES = \ - src/sparsetable_unittest.cc \ - $(sparsehashinclude_HEADERS) \ - src/google/sparsetable -nodist_sparsetable_unittest_SOURCES = $(nodist_sparsehashinclude_HEADERS) - -hashtable_unittest_SOURCES = \ - src/hashtable_unittest.cc \ - $(googleinclude_HEADERS) \ - $(sparsehashinclude_HEADERS) \ - src/words -nodist_hashtable_unittest_SOURCES = $(nodist_sparsehashinclude_HEADERS) - -simple_test_SOURCES = \ - src/simple_test.cc \ - $(sparsehashinclude_HEADERS) -nodist_simple_test_SOURCES = $(nodist_sparsehashinclude_HEADERS) - -time_hash_map_SOURCES = \ - src/time_hash_map.cc \ - $(sparsehashinclude_HEADERS) \ - $(googleinclude_HEADERS) -nodist_time_hash_map_SOURCES = $(nodist_sparsehashinclude_HEADERS) - -# If tcmalloc is installed, use it with time_hash_map; it gives us -# heap-usage statistics for the hash_map routines, which is very nice -time_hash_map_CXXFLAGS = @tcmalloc_flags@ $(AM_CXXFLAGS) -time_hash_map_LDFLAGS = @tcmalloc_flags@ -time_hash_map_LDADD = @tcmalloc_libs@ - -## ^^^^ END OF RULES TO MAKE THE LIBRARIES, BINARIES, AND UNITTESTS - - -rpm: dist-gzip packages/rpm.sh packages/rpm/rpm.spec - @cd packages && ./rpm.sh ${PACKAGE} ${VERSION} - -deb: dist-gzip packages/deb.sh packages/deb/* - @cd packages && ./deb.sh ${PACKAGE} ${VERSION} - -# Windows wants write permission to .vcproj files and maybe even sln files. -dist-hook: - test -e "$(distdir)/vsprojects" \ - && chmod -R u+w $(distdir)/*.sln $(distdir)/vsprojects/ - -EXTRA_DIST = packages/rpm.sh packages/rpm/rpm.spec packages/deb.sh packages/deb \ - src/config.h.include src/windows $(WINDOWS_PROJECTS) experimental diff --git a/src/sparsehash-1.6/NEWS b/src/sparsehash-1.6/NEWS deleted file mode 100644 index e69de29..0000000 diff --git a/src/sparsehash-1.6/README b/src/sparsehash-1.6/README deleted file mode 100644 index 11df6ed..0000000 --- a/src/sparsehash-1.6/README +++ /dev/null @@ -1,149 +0,0 @@ -This directory contains several hash-map implementations, similar in -API to SGI's hash_map class, but with different performance -characteristics. sparse_hash_map uses very little space overhead, 1-2 -bits per entry. dense_hash_map is very fast, particulary on lookup. -(sparse_hash_set and dense_hash_set are the set versions of these -routines.) On the other hand, these classes have requirements that -may not make them appropriate for all applications. - -All these implementation use a hashtable with internal quadratic -probing. This method is space-efficient -- there is no pointer -overhead -- and time-efficient for good hash functions. - -COMPILING ---------- -To compile test applications with these classes, run ./configure -followed by make. To install these header files on your system, run -'make install'. (On Windows, the instructions are different; see -README.windows.) See INSTALL for more details. - -This code should work on any modern C++ system. It has been tested on -Linux (Ubuntu, Fedora, RedHat, Debian), Solaris 10 x86, FreeBSD 6.0, -OS X 10.3 and 10.4, and Windows under both VC++7 and VC++8. - -USING ------ -See the html files in the doc directory for small example programs -that use these classes. It's enough to just include the header file: - - #include // or sparse_hash_set, dense_hash_map, ... - google::sparse_hash_set number_mapper; - -and use the class the way you would other hash-map implementations. -(Though see "API" below for caveats.) - -By default (you can change it via a flag to ./configure), these hash -implementations are defined in the google namespace. - -API ---- -The API for sparse_hash_map, dense_hash_map, sparse_hash_set, and -dense_hash_set, are a superset of the API of SGI's hash_map class. -See doc/sparse_hash_map.html, et al., for more information about the -API. - -The usage of these classes differ from SGI's hash_map, and other -hashtable implementations, in the following major ways: - -1) dense_hash_map requires you to set aside one key value as the - 'empty bucket' value, set via the set_empty_key() method. This - *MUST* be called before you can use the dense_hash_map. It is - illegal to insert any elements into a dense_hash_map whose key is - equal to the empty-key. - -2) For both dense_hash_map and sparse_hash_map, if you wish to delete - elements from the hashtable, you must set aside a key value as the - 'deleted bucket' value, set via the set_deleted_key() method. If - your hash-map is insert-only, there is no need to call this - method. If you call set_deleted_key(), it is illegal to insert any - elements into a dense_hash_map or sparse_hash_map whose key is - equal to the deleted-key. - -3) These hash-map implementation support I/O. See below. - -There are also some smaller differences: - -1) The constructor takes an optional argument that specifies the - number of elements you expect to insert into the hashtable. This - differs from SGI's hash_map implementation, which takes an optional - number of buckets. - -2) erase() does not immediately reclaim memory. As a consequence, - erase() does not invalidate any iterators, making loops like this - correct: - for (it = ht.begin(); it != ht.end(); ++it) - if (...) ht.erase(it); - As another consequence, a series of erase() calls can leave your - hashtable using more memory than it needs to. The hashtable will - automatically compact() at the next call to insert(), but to - manually compact a hashtable, you can call - ht.resize(0) - -3) While sparse_hash_map et al. accept an Allocator template argument, - they ignore it. They use malloc() and free() for all memory - allocations. - -4) sparse_hash_map et al. do not use exceptions. - -I/O ---- -In addition to the normal hash-map operations, sparse_hash_map can -read and write hashtables to disk. (dense_hash_map also has the API, -but it has not yet been implemented, and writes will always fail.) - -In the simplest case, writing a hashtable is as easy as calling two -methods on the hashtable: - ht.write_metadata(fp); - ht.write_nopointer_data(fp); - -Reading in this data is equally simple: - google::sparse_hash_map<...> ht; - ht.read_metadata(fp); - ht.read_nopointer_data(fp); - -The above is sufficient if the key and value do not contain any -pointers: they are basic C types or agglomorations of basic C types. -If the key and/or value do contain pointers, you can still store the -hashtable by replacing write_nopointer_data() with a custom writing -routine. See sparse_hash_map.html et al. for more information. - -SPARSETABLE ------------ -In addition to the hash-map and hash-set classes, this package also -provides sparsetable.h, an array implementation that uses space -proportional to the number of elements in the array, rather than the -maximum element index. It uses very little space overhead: 1 bit per -entry. See doc/sparsetable.html for the API. - -RESOURCE USAGE --------------- -* sparse_hash_map has memory overhead of about 2 bits per hash-map - entry. -* dense_hash_map has a factor of 2-3 memory overhead: if your - hashtable data takes X bytes, dense_hash_map will use 3X-4X memory - total. - -Hashtables tend to double in size when resizing, creating an -additional 50% space overhead. dense_hash_map does in fact have a -significant "high water mark" memory use requirement. -sparse_hash_map, however, is written to need very little space -overhead when resizing: only a few bits per hashtable entry. - -PERFORMANCE ------------ -You can compile and run the included file time_hash_map.cc to examine -the performance of sparse_hash_map, dense_hash_map, and your native -hash_map implementation on your system. One test against the -SGI hash_map implementation gave the following timing information for -a simple find() call: - SGI hash_map: 22 ns - dense_hash_map: 13 ns - sparse_hash_map: 117 ns - SGI map: 113 ns - -See doc/performance.html for more detailed charts on resource usage -and performance data. - ---- -16 March 2005 -(Last updated: 20 March 2007) diff --git a/src/sparsehash-1.6/TODO b/src/sparsehash-1.6/TODO deleted file mode 100644 index e9b0263..0000000 --- a/src/sparsehash-1.6/TODO +++ /dev/null @@ -1,28 +0,0 @@ -1) TODO: I/O implementation in densehashtable.h - -2) TODO: document SPARSEHASH_STAT_UPDATE macro, and also macros that - tweak performance. Perhaps add support to these to the API? - -3) TODO: support exceptions? - -4) BUG: sparsetable's operator[] doesn't work well with printf: you - need to explicitly cast the result to value_type to print it. (It - works fine with streams.) - -5) TODO: consider rewriting dense_hash_map to use a 'groups' scheme, - like sparsetable, but without the sparse-allocation within a - group. This makes resizing have better memory-use properties. The - downside is that probes across groups might take longer since - groups are not contiguous in memory. Making groups the same size - as a cache-line, and ensuring they're loaded on cache-line - boundaries, might help. Needs careful testing to make sure it - doesn't hurt performance. - -6) TODO: Get the C-only version of sparsehash in experimental/ ready - for prime-time. - -7) TODO: use cmake (www.cmake.org) to make it easy to isntall this on - a windows system. - ---- -28 February 2007 diff --git a/src/sparsehash-1.6/aclocal.m4 b/src/sparsehash-1.6/aclocal.m4 deleted file mode 100644 index 2bdc7db..0000000 --- a/src/sparsehash-1.6/aclocal.m4 +++ /dev/null @@ -1,868 +0,0 @@ -# generated automatically by aclocal 1.9.6 -*- Autoconf -*- - -# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, -# 2005 Free Software Foundation, Inc. -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - -# Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# AM_AUTOMAKE_VERSION(VERSION) -# ---------------------------- -# Automake X.Y traces this macro to ensure aclocal.m4 has been -# generated from the m4 files accompanying Automake X.Y. -AC_DEFUN([AM_AUTOMAKE_VERSION], [am__api_version="1.9"]) - -# AM_SET_CURRENT_AUTOMAKE_VERSION -# ------------------------------- -# Call AM_AUTOMAKE_VERSION so it can be traced. -# This function is AC_REQUIREd by AC_INIT_AUTOMAKE. -AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], - [AM_AUTOMAKE_VERSION([1.9.6])]) - -# AM_AUX_DIR_EXPAND -*- Autoconf -*- - -# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets -# $ac_aux_dir to `$srcdir/foo'. In other projects, it is set to -# `$srcdir', `$srcdir/..', or `$srcdir/../..'. -# -# Of course, Automake must honor this variable whenever it calls a -# tool from the auxiliary directory. The problem is that $srcdir (and -# therefore $ac_aux_dir as well) can be either absolute or relative, -# depending on how configure is run. This is pretty annoying, since -# it makes $ac_aux_dir quite unusable in subdirectories: in the top -# source directory, any form will work fine, but in subdirectories a -# relative path needs to be adjusted first. -# -# $ac_aux_dir/missing -# fails when called from a subdirectory if $ac_aux_dir is relative -# $top_srcdir/$ac_aux_dir/missing -# fails if $ac_aux_dir is absolute, -# fails when called from a subdirectory in a VPATH build with -# a relative $ac_aux_dir -# -# The reason of the latter failure is that $top_srcdir and $ac_aux_dir -# are both prefixed by $srcdir. In an in-source build this is usually -# harmless because $srcdir is `.', but things will broke when you -# start a VPATH build or use an absolute $srcdir. -# -# So we could use something similar to $top_srcdir/$ac_aux_dir/missing, -# iff we strip the leading $srcdir from $ac_aux_dir. That would be: -# am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"` -# and then we would define $MISSING as -# MISSING="\${SHELL} $am_aux_dir/missing" -# This will work as long as MISSING is not called from configure, because -# unfortunately $(top_srcdir) has no meaning in configure. -# However there are other variables, like CC, which are often used in -# configure, and could therefore not use this "fixed" $ac_aux_dir. -# -# Another solution, used here, is to always expand $ac_aux_dir to an -# absolute PATH. The drawback is that using absolute paths prevent a -# configured tree to be moved without reconfiguration. - -AC_DEFUN([AM_AUX_DIR_EXPAND], -[dnl Rely on autoconf to set up CDPATH properly. -AC_PREREQ([2.50])dnl -# expand $ac_aux_dir to an absolute path -am_aux_dir=`cd $ac_aux_dir && pwd` -]) - -# AM_CONDITIONAL -*- Autoconf -*- - -# Copyright (C) 1997, 2000, 2001, 2003, 2004, 2005 -# Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# serial 7 - -# AM_CONDITIONAL(NAME, SHELL-CONDITION) -# ------------------------------------- -# Define a conditional. -AC_DEFUN([AM_CONDITIONAL], -[AC_PREREQ(2.52)dnl - ifelse([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])], - [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl -AC_SUBST([$1_TRUE]) -AC_SUBST([$1_FALSE]) -if $2; then - $1_TRUE= - $1_FALSE='#' -else - $1_TRUE='#' - $1_FALSE= -fi -AC_CONFIG_COMMANDS_PRE( -[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then - AC_MSG_ERROR([[conditional "$1" was never defined. -Usually this means the macro was only invoked conditionally.]]) -fi])]) - - -# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005 -# Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# serial 8 - -# There are a few dirty hacks below to avoid letting `AC_PROG_CC' be -# written in clear, in which case automake, when reading aclocal.m4, -# will think it sees a *use*, and therefore will trigger all it's -# C support machinery. Also note that it means that autoscan, seeing -# CC etc. in the Makefile, will ask for an AC_PROG_CC use... - - -# _AM_DEPENDENCIES(NAME) -# ---------------------- -# See how the compiler implements dependency checking. -# NAME is "CC", "CXX", "GCJ", or "OBJC". -# We try a few techniques and use that to set a single cache variable. -# -# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was -# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular -# dependency, and given that the user is not expected to run this macro, -# just rely on AC_PROG_CC. -AC_DEFUN([_AM_DEPENDENCIES], -[AC_REQUIRE([AM_SET_DEPDIR])dnl -AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl -AC_REQUIRE([AM_MAKE_INCLUDE])dnl -AC_REQUIRE([AM_DEP_TRACK])dnl - -ifelse([$1], CC, [depcc="$CC" am_compiler_list=], - [$1], CXX, [depcc="$CXX" am_compiler_list=], - [$1], OBJC, [depcc="$OBJC" am_compiler_list='gcc3 gcc'], - [$1], GCJ, [depcc="$GCJ" am_compiler_list='gcc3 gcc'], - [depcc="$$1" am_compiler_list=]) - -AC_CACHE_CHECK([dependency style of $depcc], - [am_cv_$1_dependencies_compiler_type], -[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then - # We make a subdir and do the tests there. Otherwise we can end up - # making bogus files that we don't know about and never remove. For - # instance it was reported that on HP-UX the gcc test will end up - # making a dummy file named `D' -- because `-MD' means `put the output - # in D'. - mkdir conftest.dir - # Copy depcomp to subdir because otherwise we won't find it if we're - # using a relative directory. - cp "$am_depcomp" conftest.dir - cd conftest.dir - # We will build objects and dependencies in a subdirectory because - # it helps to detect inapplicable dependency modes. For instance - # both Tru64's cc and ICC support -MD to output dependencies as a - # side effect of compilation, but ICC will put the dependencies in - # the current directory while Tru64 will put them in the object - # directory. - mkdir sub - - am_cv_$1_dependencies_compiler_type=none - if test "$am_compiler_list" = ""; then - am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp` - fi - for depmode in $am_compiler_list; do - # Setup a source with many dependencies, because some compilers - # like to wrap large dependency lists on column 80 (with \), and - # we should not choose a depcomp mode which is confused by this. - # - # We need to recreate these files for each test, as the compiler may - # overwrite some of them when testing with obscure command lines. - # This happens at least with the AIX C compiler. - : > sub/conftest.c - for i in 1 2 3 4 5 6; do - echo '#include "conftst'$i'.h"' >> sub/conftest.c - # Using `: > sub/conftst$i.h' creates only sub/conftst1.h with - # Solaris 8's {/usr,}/bin/sh. - touch sub/conftst$i.h - done - echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf - - case $depmode in - nosideeffect) - # after this tag, mechanisms are not by side-effect, so they'll - # only be used when explicitly requested - if test "x$enable_dependency_tracking" = xyes; then - continue - else - break - fi - ;; - none) break ;; - esac - # We check with `-c' and `-o' for the sake of the "dashmstdout" - # mode. It turns out that the SunPro C++ compiler does not properly - # handle `-M -o', and we need to detect this. - if depmode=$depmode \ - source=sub/conftest.c object=sub/conftest.${OBJEXT-o} \ - depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ - $SHELL ./depcomp $depcc -c -o sub/conftest.${OBJEXT-o} sub/conftest.c \ - >/dev/null 2>conftest.err && - grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && - grep sub/conftest.${OBJEXT-o} sub/conftest.Po > /dev/null 2>&1 && - ${MAKE-make} -s -f confmf > /dev/null 2>&1; then - # icc doesn't choke on unknown options, it will just issue warnings - # or remarks (even with -Werror). So we grep stderr for any message - # that says an option was ignored or not supported. - # When given -MP, icc 7.0 and 7.1 complain thusly: - # icc: Command line warning: ignoring option '-M'; no argument required - # The diagnosis changed in icc 8.0: - # icc: Command line remark: option '-MP' not supported - if (grep 'ignoring option' conftest.err || - grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else - am_cv_$1_dependencies_compiler_type=$depmode - break - fi - fi - done - - cd .. - rm -rf conftest.dir -else - am_cv_$1_dependencies_compiler_type=none -fi -]) -AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type]) -AM_CONDITIONAL([am__fastdep$1], [ - test "x$enable_dependency_tracking" != xno \ - && test "$am_cv_$1_dependencies_compiler_type" = gcc3]) -]) - - -# AM_SET_DEPDIR -# ------------- -# Choose a directory name for dependency files. -# This macro is AC_REQUIREd in _AM_DEPENDENCIES -AC_DEFUN([AM_SET_DEPDIR], -[AC_REQUIRE([AM_SET_LEADING_DOT])dnl -AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl -]) - - -# AM_DEP_TRACK -# ------------ -AC_DEFUN([AM_DEP_TRACK], -[AC_ARG_ENABLE(dependency-tracking, -[ --disable-dependency-tracking speeds up one-time build - --enable-dependency-tracking do not reject slow dependency extractors]) -if test "x$enable_dependency_tracking" != xno; then - am_depcomp="$ac_aux_dir/depcomp" - AMDEPBACKSLASH='\' -fi -AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno]) -AC_SUBST([AMDEPBACKSLASH]) -]) - -# Generate code to set up dependency tracking. -*- Autoconf -*- - -# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005 -# Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -#serial 3 - -# _AM_OUTPUT_DEPENDENCY_COMMANDS -# ------------------------------ -AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS], -[for mf in $CONFIG_FILES; do - # Strip MF so we end up with the name of the file. - mf=`echo "$mf" | sed -e 's/:.*$//'` - # Check whether this is an Automake generated Makefile or not. - # We used to match only the files named `Makefile.in', but - # some people rename them; so instead we look at the file content. - # Grep'ing the first line is not enough: some people post-process - # each Makefile.in and add a new line on top of each file to say so. - # So let's grep whole file. - if grep '^#.*generated by automake' $mf > /dev/null 2>&1; then - dirpart=`AS_DIRNAME("$mf")` - else - continue - fi - # Extract the definition of DEPDIR, am__include, and am__quote - # from the Makefile without running `make'. - DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"` - test -z "$DEPDIR" && continue - am__include=`sed -n 's/^am__include = //p' < "$mf"` - test -z "am__include" && continue - am__quote=`sed -n 's/^am__quote = //p' < "$mf"` - # When using ansi2knr, U may be empty or an underscore; expand it - U=`sed -n 's/^U = //p' < "$mf"` - # Find all dependency output files, they are included files with - # $(DEPDIR) in their names. We invoke sed twice because it is the - # simplest approach to changing $(DEPDIR) to its actual value in the - # expansion. - for file in `sed -n " - s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \ - sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do - # Make sure the directory exists. - test -f "$dirpart/$file" && continue - fdir=`AS_DIRNAME(["$file"])` - AS_MKDIR_P([$dirpart/$fdir]) - # echo "creating $dirpart/$file" - echo '# dummy' > "$dirpart/$file" - done -done -])# _AM_OUTPUT_DEPENDENCY_COMMANDS - - -# AM_OUTPUT_DEPENDENCY_COMMANDS -# ----------------------------- -# This macro should only be invoked once -- use via AC_REQUIRE. -# -# This code is only required when automatic dependency tracking -# is enabled. FIXME. This creates each `.P' file that we will -# need in order to bootstrap the dependency handling code. -AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS], -[AC_CONFIG_COMMANDS([depfiles], - [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS], - [AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"]) -]) - -# Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005 -# Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# serial 8 - -# AM_CONFIG_HEADER is obsolete. It has been replaced by AC_CONFIG_HEADERS. -AU_DEFUN([AM_CONFIG_HEADER], [AC_CONFIG_HEADERS($@)]) - -# Do all the work for Automake. -*- Autoconf -*- - -# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 -# Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# serial 12 - -# This macro actually does too much. Some checks are only needed if -# your package does certain things. But this isn't really a big deal. - -# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE]) -# AM_INIT_AUTOMAKE([OPTIONS]) -# ----------------------------------------------- -# The call with PACKAGE and VERSION arguments is the old style -# call (pre autoconf-2.50), which is being phased out. PACKAGE -# and VERSION should now be passed to AC_INIT and removed from -# the call to AM_INIT_AUTOMAKE. -# We support both call styles for the transition. After -# the next Automake release, Autoconf can make the AC_INIT -# arguments mandatory, and then we can depend on a new Autoconf -# release and drop the old call support. -AC_DEFUN([AM_INIT_AUTOMAKE], -[AC_PREREQ([2.58])dnl -dnl Autoconf wants to disallow AM_ names. We explicitly allow -dnl the ones we care about. -m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl -AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl -AC_REQUIRE([AC_PROG_INSTALL])dnl -# test to see if srcdir already configured -if test "`cd $srcdir && pwd`" != "`pwd`" && - test -f $srcdir/config.status; then - AC_MSG_ERROR([source directory already configured; run "make distclean" there first]) -fi - -# test whether we have cygpath -if test -z "$CYGPATH_W"; then - if (cygpath --version) >/dev/null 2>/dev/null; then - CYGPATH_W='cygpath -w' - else - CYGPATH_W=echo - fi -fi -AC_SUBST([CYGPATH_W]) - -# Define the identity of the package. -dnl Distinguish between old-style and new-style calls. -m4_ifval([$2], -[m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl - AC_SUBST([PACKAGE], [$1])dnl - AC_SUBST([VERSION], [$2])], -[_AM_SET_OPTIONS([$1])dnl - AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl - AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl - -_AM_IF_OPTION([no-define],, -[AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE", [Name of package]) - AC_DEFINE_UNQUOTED(VERSION, "$VERSION", [Version number of package])])dnl - -# Some tools Automake needs. -AC_REQUIRE([AM_SANITY_CHECK])dnl -AC_REQUIRE([AC_ARG_PROGRAM])dnl -AM_MISSING_PROG(ACLOCAL, aclocal-${am__api_version}) -AM_MISSING_PROG(AUTOCONF, autoconf) -AM_MISSING_PROG(AUTOMAKE, automake-${am__api_version}) -AM_MISSING_PROG(AUTOHEADER, autoheader) -AM_MISSING_PROG(MAKEINFO, makeinfo) -AM_PROG_INSTALL_SH -AM_PROG_INSTALL_STRIP -AC_REQUIRE([AM_PROG_MKDIR_P])dnl -# We need awk for the "check" target. The system "awk" is bad on -# some platforms. -AC_REQUIRE([AC_PROG_AWK])dnl -AC_REQUIRE([AC_PROG_MAKE_SET])dnl -AC_REQUIRE([AM_SET_LEADING_DOT])dnl -_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])], - [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])], - [_AM_PROG_TAR([v7])])]) -_AM_IF_OPTION([no-dependencies],, -[AC_PROVIDE_IFELSE([AC_PROG_CC], - [_AM_DEPENDENCIES(CC)], - [define([AC_PROG_CC], - defn([AC_PROG_CC])[_AM_DEPENDENCIES(CC)])])dnl -AC_PROVIDE_IFELSE([AC_PROG_CXX], - [_AM_DEPENDENCIES(CXX)], - [define([AC_PROG_CXX], - defn([AC_PROG_CXX])[_AM_DEPENDENCIES(CXX)])])dnl -]) -]) - - -# When config.status generates a header, we must update the stamp-h file. -# This file resides in the same directory as the config header -# that is generated. The stamp files are numbered to have different names. - -# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the -# loop where config.status creates the headers, so we can generate -# our stamp files there. -AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK], -[# Compute $1's index in $config_headers. -_am_stamp_count=1 -for _am_header in $config_headers :; do - case $_am_header in - $1 | $1:* ) - break ;; - * ) - _am_stamp_count=`expr $_am_stamp_count + 1` ;; - esac -done -echo "timestamp for $1" >`AS_DIRNAME([$1])`/stamp-h[]$_am_stamp_count]) - -# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# AM_PROG_INSTALL_SH -# ------------------ -# Define $install_sh. -AC_DEFUN([AM_PROG_INSTALL_SH], -[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl -install_sh=${install_sh-"$am_aux_dir/install-sh"} -AC_SUBST(install_sh)]) - -# Copyright (C) 2003, 2005 Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# serial 2 - -# Check whether the underlying file-system supports filenames -# with a leading dot. For instance MS-DOS doesn't. -AC_DEFUN([AM_SET_LEADING_DOT], -[rm -rf .tst 2>/dev/null -mkdir .tst 2>/dev/null -if test -d .tst; then - am__leading_dot=. -else - am__leading_dot=_ -fi -rmdir .tst 2>/dev/null -AC_SUBST([am__leading_dot])]) - -# Check to see how 'make' treats includes. -*- Autoconf -*- - -# Copyright (C) 2001, 2002, 2003, 2005 Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# serial 3 - -# AM_MAKE_INCLUDE() -# ----------------- -# Check to see how make treats includes. -AC_DEFUN([AM_MAKE_INCLUDE], -[am_make=${MAKE-make} -cat > confinc << 'END' -am__doit: - @echo done -.PHONY: am__doit -END -# If we don't find an include directive, just comment out the code. -AC_MSG_CHECKING([for style of include used by $am_make]) -am__include="#" -am__quote= -_am_result=none -# First try GNU make style include. -echo "include confinc" > confmf -# We grep out `Entering directory' and `Leaving directory' -# messages which can occur if `w' ends up in MAKEFLAGS. -# In particular we don't look at `^make:' because GNU make might -# be invoked under some other name (usually "gmake"), in which -# case it prints its new name instead of `make'. -if test "`$am_make -s -f confmf 2> /dev/null | grep -v 'ing directory'`" = "done"; then - am__include=include - am__quote= - _am_result=GNU -fi -# Now try BSD make style include. -if test "$am__include" = "#"; then - echo '.include "confinc"' > confmf - if test "`$am_make -s -f confmf 2> /dev/null`" = "done"; then - am__include=.include - am__quote="\"" - _am_result=BSD - fi -fi -AC_SUBST([am__include]) -AC_SUBST([am__quote]) -AC_MSG_RESULT([$_am_result]) -rm -f confinc confmf -]) - -# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*- - -# Copyright (C) 1997, 1999, 2000, 2001, 2003, 2005 -# Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# serial 4 - -# AM_MISSING_PROG(NAME, PROGRAM) -# ------------------------------ -AC_DEFUN([AM_MISSING_PROG], -[AC_REQUIRE([AM_MISSING_HAS_RUN]) -$1=${$1-"${am_missing_run}$2"} -AC_SUBST($1)]) - - -# AM_MISSING_HAS_RUN -# ------------------ -# Define MISSING if not defined so far and test if it supports --run. -# If it does, set am_missing_run to use it, otherwise, to nothing. -AC_DEFUN([AM_MISSING_HAS_RUN], -[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl -test x"${MISSING+set}" = xset || MISSING="\${SHELL} $am_aux_dir/missing" -# Use eval to expand $SHELL -if eval "$MISSING --run true"; then - am_missing_run="$MISSING --run " -else - am_missing_run= - AC_MSG_WARN([`missing' script is too old or missing]) -fi -]) - -# Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# AM_PROG_MKDIR_P -# --------------- -# Check whether `mkdir -p' is supported, fallback to mkinstalldirs otherwise. -# -# Automake 1.8 used `mkdir -m 0755 -p --' to ensure that directories -# created by `make install' are always world readable, even if the -# installer happens to have an overly restrictive umask (e.g. 077). -# This was a mistake. There are at least two reasons why we must not -# use `-m 0755': -# - it causes special bits like SGID to be ignored, -# - it may be too restrictive (some setups expect 775 directories). -# -# Do not use -m 0755 and let people choose whatever they expect by -# setting umask. -# -# We cannot accept any implementation of `mkdir' that recognizes `-p'. -# Some implementations (such as Solaris 8's) are not thread-safe: if a -# parallel make tries to run `mkdir -p a/b' and `mkdir -p a/c' -# concurrently, both version can detect that a/ is missing, but only -# one can create it and the other will error out. Consequently we -# restrict ourselves to GNU make (using the --version option ensures -# this.) -AC_DEFUN([AM_PROG_MKDIR_P], -[if mkdir -p --version . >/dev/null 2>&1 && test ! -d ./--version; then - # We used to keeping the `.' as first argument, in order to - # allow $(mkdir_p) to be used without argument. As in - # $(mkdir_p) $(somedir) - # where $(somedir) is conditionally defined. However this is wrong - # for two reasons: - # 1. if the package is installed by a user who cannot write `.' - # make install will fail, - # 2. the above comment should most certainly read - # $(mkdir_p) $(DESTDIR)$(somedir) - # so it does not work when $(somedir) is undefined and - # $(DESTDIR) is not. - # To support the latter case, we have to write - # test -z "$(somedir)" || $(mkdir_p) $(DESTDIR)$(somedir), - # so the `.' trick is pointless. - mkdir_p='mkdir -p --' -else - # On NextStep and OpenStep, the `mkdir' command does not - # recognize any option. It will interpret all options as - # directories to create, and then abort because `.' already - # exists. - for d in ./-p ./--version; - do - test -d $d && rmdir $d - done - # $(mkinstalldirs) is defined by Automake if mkinstalldirs exists. - if test -f "$ac_aux_dir/mkinstalldirs"; then - mkdir_p='$(mkinstalldirs)' - else - mkdir_p='$(install_sh) -d' - fi -fi -AC_SUBST([mkdir_p])]) - -# Helper functions for option handling. -*- Autoconf -*- - -# Copyright (C) 2001, 2002, 2003, 2005 Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# serial 3 - -# _AM_MANGLE_OPTION(NAME) -# ----------------------- -AC_DEFUN([_AM_MANGLE_OPTION], -[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])]) - -# _AM_SET_OPTION(NAME) -# ------------------------------ -# Set option NAME. Presently that only means defining a flag for this option. -AC_DEFUN([_AM_SET_OPTION], -[m4_define(_AM_MANGLE_OPTION([$1]), 1)]) - -# _AM_SET_OPTIONS(OPTIONS) -# ---------------------------------- -# OPTIONS is a space-separated list of Automake options. -AC_DEFUN([_AM_SET_OPTIONS], -[AC_FOREACH([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])]) - -# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET]) -# ------------------------------------------- -# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. -AC_DEFUN([_AM_IF_OPTION], -[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])]) - -# Check to make sure that the build environment is sane. -*- Autoconf -*- - -# Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005 -# Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# serial 4 - -# AM_SANITY_CHECK -# --------------- -AC_DEFUN([AM_SANITY_CHECK], -[AC_MSG_CHECKING([whether build environment is sane]) -# Just in case -sleep 1 -echo timestamp > conftest.file -# Do `set' in a subshell so we don't clobber the current shell's -# arguments. Must try -L first in case configure is actually a -# symlink; some systems play weird games with the mod time of symlinks -# (eg FreeBSD returns the mod time of the symlink's containing -# directory). -if ( - set X `ls -Lt $srcdir/configure conftest.file 2> /dev/null` - if test "$[*]" = "X"; then - # -L didn't work. - set X `ls -t $srcdir/configure conftest.file` - fi - rm -f conftest.file - if test "$[*]" != "X $srcdir/configure conftest.file" \ - && test "$[*]" != "X conftest.file $srcdir/configure"; then - - # If neither matched, then we have a broken ls. This can happen - # if, for instance, CONFIG_SHELL is bash and it inherits a - # broken ls alias from the environment. This has actually - # happened. Such a system could not be considered "sane". - AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken -alias in your environment]) - fi - - test "$[2]" = conftest.file - ) -then - # Ok. - : -else - AC_MSG_ERROR([newly created file is older than distributed files! -Check your system clock]) -fi -AC_MSG_RESULT(yes)]) - -# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# AM_PROG_INSTALL_STRIP -# --------------------- -# One issue with vendor `install' (even GNU) is that you can't -# specify the program used to strip binaries. This is especially -# annoying in cross-compiling environments, where the build's strip -# is unlikely to handle the host's binaries. -# Fortunately install-sh will honor a STRIPPROG variable, so we -# always use install-sh in `make install-strip', and initialize -# STRIPPROG with the value of the STRIP variable (set by the user). -AC_DEFUN([AM_PROG_INSTALL_STRIP], -[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl -# Installed binaries are usually stripped using `strip' when the user -# run `make install-strip'. However `strip' might not be the right -# tool to use in cross-compilation environments, therefore Automake -# will honor the `STRIP' environment variable to overrule this program. -dnl Don't test for $cross_compiling = yes, because it might be `maybe'. -if test "$cross_compiling" != no; then - AC_CHECK_TOOL([STRIP], [strip], :) -fi -INSTALL_STRIP_PROGRAM="\${SHELL} \$(install_sh) -c -s" -AC_SUBST([INSTALL_STRIP_PROGRAM])]) - -# Check how to create a tarball. -*- Autoconf -*- - -# Copyright (C) 2004, 2005 Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# serial 2 - -# _AM_PROG_TAR(FORMAT) -# -------------------- -# Check how to create a tarball in format FORMAT. -# FORMAT should be one of `v7', `ustar', or `pax'. -# -# Substitute a variable $(am__tar) that is a command -# writing to stdout a FORMAT-tarball containing the directory -# $tardir. -# tardir=directory && $(am__tar) > result.tar -# -# Substitute a variable $(am__untar) that extract such -# a tarball read from stdin. -# $(am__untar) < result.tar -AC_DEFUN([_AM_PROG_TAR], -[# Always define AMTAR for backward compatibility. -AM_MISSING_PROG([AMTAR], [tar]) -m4_if([$1], [v7], - [am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'], - [m4_case([$1], [ustar],, [pax],, - [m4_fatal([Unknown tar format])]) -AC_MSG_CHECKING([how to create a $1 tar archive]) -# Loop over all known methods to create a tar archive until one works. -_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none' -_am_tools=${am_cv_prog_tar_$1-$_am_tools} -# Do not fold the above two line into one, because Tru64 sh and -# Solaris sh will not grok spaces in the rhs of `-'. -for _am_tool in $_am_tools -do - case $_am_tool in - gnutar) - for _am_tar in tar gnutar gtar; - do - AM_RUN_LOG([$_am_tar --version]) && break - done - am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"' - am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"' - am__untar="$_am_tar -xf -" - ;; - plaintar) - # Must skip GNU tar: if it does not support --format= it doesn't create - # ustar tarball either. - (tar --version) >/dev/null 2>&1 && continue - am__tar='tar chf - "$$tardir"' - am__tar_='tar chf - "$tardir"' - am__untar='tar xf -' - ;; - pax) - am__tar='pax -L -x $1 -w "$$tardir"' - am__tar_='pax -L -x $1 -w "$tardir"' - am__untar='pax -r' - ;; - cpio) - am__tar='find "$$tardir" -print | cpio -o -H $1 -L' - am__tar_='find "$tardir" -print | cpio -o -H $1 -L' - am__untar='cpio -i -H $1 -d' - ;; - none) - am__tar=false - am__tar_=false - am__untar=false - ;; - esac - - # If the value was cached, stop now. We just wanted to have am__tar - # and am__untar set. - test -n "${am_cv_prog_tar_$1}" && break - - # tar/untar a dummy directory, and stop if the command works - rm -rf conftest.dir - mkdir conftest.dir - echo GrepMe > conftest.dir/file - AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar]) - rm -rf conftest.dir - if test -s conftest.tar; then - AM_RUN_LOG([$am__untar /dev/null 2>&1 && break - fi -done -rm -rf conftest.dir - -AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool]) -AC_MSG_RESULT([$am_cv_prog_tar_$1])]) -AC_SUBST([am__tar]) -AC_SUBST([am__untar]) -]) # _AM_PROG_TAR - -m4_include([m4/acx_pthread.m4]) -m4_include([m4/google_namespace.m4]) -m4_include([m4/namespaces.m4]) -m4_include([m4/stl_hash.m4]) -m4_include([m4/stl_hash_fun.m4]) -m4_include([m4/stl_namespace.m4]) diff --git a/src/sparsehash-1.6/configure.ac b/src/sparsehash-1.6/configure.ac deleted file mode 100644 index dea367c..0000000 --- a/src/sparsehash-1.6/configure.ac +++ /dev/null @@ -1,74 +0,0 @@ -## Process this file with autoconf to produce configure. -## In general, the safest way to proceed is to run ./autogen.sh - -# make sure we're interpreted by some minimal autoconf -AC_PREREQ(2.57) - -AC_INIT(sparsehash, 1.6, opensource@google.com) -# The argument here is just something that should be in the current directory -# (for sanity checking) -AC_CONFIG_SRCDIR(README) -AM_INIT_AUTOMAKE([dist-zip]) -AM_CONFIG_HEADER(src/config.h) - -# Checks for programs. -AC_PROG_CC -AC_PROG_CPP -AC_PROG_CXX -AM_CONDITIONAL(GCC, test "$GCC" = yes) # let the Makefile know if we're gcc - -# Check whether some low-level functions/files are available -AC_HEADER_STDC -AC_CHECK_FUNCS(memcpy memmove) -AC_CHECK_TYPES([uint16_t]) # defined in C99 systems -AC_CHECK_TYPES([u_int16_t]) # defined in BSD-derived systems, and gnu -AC_CHECK_TYPES([__uint16]) # defined in some windows systems (vc7) -AC_CHECK_TYPES([long long]) # probably defined everywhere, but... - -# These are 'only' needed for unittests -AC_CHECK_HEADERS(sys/resource.h unistd.h sys/time.h sys/utsname.h) - -# If you have google-perftools installed, we can do a bit more testing. -# We not only want to set HAVE_MALLOC_EXTENSION_H, we also want to set -# a variable to let the Makefile to know to link in tcmalloc. -AC_LANG([C++]) -AC_CHECK_HEADERS(google/malloc_extension.h, - tcmalloc_libs=-ltcmalloc, - tcmalloc_libs=) -# On some systems, when linking in tcmalloc you also need to link in -# pthread. That's a bug somewhere, but we'll work around it for now. -tcmalloc_flags="" -if test -n "$tcmalloc_libs"; then - ACX_PTHREAD - tcmalloc_flags="\$(PTHREAD_CFLAGS)" - tcmalloc_libs="$tcmalloc_libs \$(PTHREAD_LIBS)" -fi -AC_SUBST(tcmalloc_flags) -AC_SUBST(tcmalloc_libs) - -# Figure out where hash_map lives and also hash_fun.h (or stl_hash_fun.h). -# This also tells us what namespace hash code lives in. -AC_CXX_STL_HASH -AC_CXX_STL_HASH_FUN - -# Find out what namespace 'normal' STL code lives in, and also what namespace -# the user wants our classes to be defined in -AC_CXX_STL_NAMESPACE -AC_DEFINE_GOOGLE_NAMESPACE(google) - -# In unix-based systems, hash is always defined as hash<> (in namespace. -# HASH_NAMESPACE.) So we can use a simple AC_DEFINE here. On -# windows, and possibly on future unix STL implementations, this -# macro will evaluate to something different.) -AC_DEFINE(SPARSEHASH_HASH_NO_NAMESPACE, hash, - [The system-provided hash function, in namespace HASH_NAMESPACE.]) - -# Do *not* define this in terms of SPARSEHASH_HASH_NO_NAMESPACE, because -# SPARSEHASH_HASH is exported to sparseconfig.h, but S_H_NO_NAMESPACE isn't. -AC_DEFINE(SPARSEHASH_HASH, HASH_NAMESPACE::hash, - [The system-provided hash function including the namespace.]) - - -# Write generated configuration file -AC_CONFIG_FILES([Makefile]) -AC_OUTPUT diff --git a/src/sparsehash-1.6/doc/dense_hash_map.html b/src/sparsehash-1.6/doc/dense_hash_map.html deleted file mode 100644 index eaef87e..0000000 --- a/src/sparsehash-1.6/doc/dense_hash_map.html +++ /dev/null @@ -1,1591 +0,0 @@ - - - - - -dense_hash_map<Key, Data, HashFcn, EqualKey, Alloc> - - - - -

[Note: this document is formatted similarly to the SGI STL -implementation documentation pages, and refers to concepts and classes -defined there. However, neither this document nor the code it -describes is associated with SGI, nor is it necessary to have SGI's -STL implementation installed in order to use this class.]

- - -

dense_hash_map<Key, Data, HashFcn, EqualKey, Alloc>

- -

dense_hash_map is a Hashed -Associative Container that associates objects of type Key -with objects of type Data. dense_hash_map is a Pair -Associative Container, meaning that its value type is pair<const Key, Data>. It is also a -Unique -Associative Container, meaning that no two elements have keys that -compare equal using EqualKey.

- -

Looking up an element in a dense_hash_map by its key is -efficient, so dense_hash_map is useful for "dictionaries" -where the order of elements is irrelevant. If it is important for the -elements to be in a particular order, however, then map is more appropriate.

- -

dense_hash_map is distinguished from other hash-map -implementations by its speed and by the ability to save -and restore contents to disk. On the other hand, this hash-map -implementation can use significantly more space than other hash-map -implementations, and it also has requirements -- for instance, for a -distinguished "empty key" -- that may not be easy for all -applications to satisfy.

- -

This class is appropriate for applications that need speedy access -to relatively small "dictionaries" stored in memory, or for -applications that need these dictionaries to be persistent. [implementation note])

- - -

Example

- -(Note: this example uses SGI semantics for hash<> --- the kind used by gcc and most Unix compiler suites -- and not -Dinkumware semantics -- the kind used by Microsoft Visual Studio. If -you are using MSVC, this example will not compile as-is: you'll need -to change hash to hash_compare, and you -won't use eqstr at all. See the MSVC documentation for -hash_map and hash_compare, for more -details.) - -
-#include <iostream>
-#include <google/dense_hash_map>
-
-using google::dense_hash_map;      // namespace where class lives by default
-using std::cout;
-using std::endl;
-using ext::hash;  // or __gnu_cxx::hash, or maybe tr1::hash, depending on your OS
-
-struct eqstr
-{
-  bool operator()(const char* s1, const char* s2) const
-  {
-    return (s1 == s2) || (s1 && s2 && strcmp(s1, s2) == 0);
-  }
-};
-
-int main()
-{
-  dense_hash_map<const char*, int, hash<const char*>, eqstr> months;
-  
-  months.set_empty_key(NULL);
-  months["january"] = 31;
-  months["february"] = 28;
-  months["march"] = 31;
-  months["april"] = 30;
-  months["may"] = 31;
-  months["june"] = 30;
-  months["july"] = 31;
-  months["august"] = 31;
-  months["september"] = 30;
-  months["october"] = 31;
-  months["november"] = 30;
-  months["december"] = 31;
-  
-  cout << "september -> " << months["september"] << endl;
-  cout << "april     -> " << months["april"] << endl;
-  cout << "june      -> " << months["june"] << endl;
-  cout << "november  -> " << months["november"] << endl;
-}
-
- - -

Definition

- -Defined in the header dense_hash_map. -This class is not part of the C++ standard, though it is mostly -compatible with the tr1 class unordered_map. - - -

Template parameters

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ParameterDescriptionDefault
- Key - - The hash_map's key type. This is also defined as - dense_hash_map::key_type. - -   -
- Data - - The hash_map's data type. This is also defined as - dense_hash_map::data_type. [7] - -   -
- HashFcn - - The hash function used by the - hash_map. This is also defined as dense_hash_map::hasher. -
Note: Hashtable performance depends heavliy on the choice of - hash function. See the performance - page for more information. -
- hash<Key> -
- EqualKey - - The hash_map key equality function: a binary predicate that determines - whether two keys are equal. This is also defined as - dense_hash_map::key_equal. - - equal_to<Key> -
- Alloc - - Ignored; this is included only for API-compatibility - with SGI's (and tr1's) STL implementation. - -
- - -

Model of

- -Unique Hashed Associative Container, -Pair Associative Container - - -

Type requirements

- -
    -
  • -Key is Assignable. -
  • -EqualKey is a Binary Predicate whose argument type is Key. -
  • -EqualKey is an equivalence relation. -
  • -Alloc is an Allocator. -
- - -

Public base classes

- -None. - - -

Members

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
MemberWhere definedDescription
- key_type - - Associative - Container - - The dense_hash_map's key type, Key. -
- data_type - - Pair - Associative Container - - The type of object associated with the keys. -
- value_type - - Pair - Associative Container - - The type of object, pair<const key_type, data_type>, - stored in the hash_map. -
- hasher - - Hashed - Associative Container - - The dense_hash_map's hash - function. -
- key_equal - - Hashed - Associative Container - - Function - object that compares keys for equality. -
- allocator_type - - Unordered Associative Container (tr1) - - The type of the Allocator given as a template parameter. -
- pointer - - Container - - Pointer to T. -
- reference - - Container - - Reference to T -
- const_reference - - Container - - Const reference to T -
- size_type - - Container - - An unsigned integral type. -
- difference_type - - Container - - A signed integral type. -
- iterator - - Container - - Iterator used to iterate through a dense_hash_map. [1] -
- const_iterator - - Container - - Const iterator used to iterate through a dense_hash_map. -
- local_iterator - - Unordered Associative Container (tr1) - - Iterator used to iterate through a subset of - dense_hash_map. [1] -
- const_local_iterator - - Unordered Associative Container (tr1) - - Const iterator used to iterate through a subset of - dense_hash_map. -
- iterator begin() - - Container - - Returns an iterator pointing to the beginning of the - dense_hash_map. -
- iterator end() - - Container - - Returns an iterator pointing to the end of the - dense_hash_map. -
- const_iterator begin() const - - Container - - Returns an const_iterator pointing to the beginning of the - dense_hash_map. -
- const_iterator end() const - - Container - - Returns an const_iterator pointing to the end of the - dense_hash_map. -
- local_iterator begin(size_type i) - - Unordered Associative Container (tr1) - - Returns a local_iterator pointing to the beginning of bucket - i in the dense_hash_map. -
- local_iterator end(size_type i) - - Unordered Associative Container (tr1) - - Returns a local_iterator pointing to the end of bucket - i in the dense_hash_map. For - dense_hash_map, each bucket contains either 0 or 1 item. -
- const_local_iterator begin(size_type i) const - - Unordered Associative Container (tr1) - - Returns a const_local_iterator pointing to the beginning of bucket - i in the dense_hash_map. -
- const_local_iterator end(size_type i) const - - Unordered Associative Container (tr1) - - Returns a const_local_iterator pointing to the end of bucket - i in the dense_hash_map. For - dense_hash_map, each bucket contains either 0 or 1 item. -
- size_type size() const - - Container - - Returns the size of the dense_hash_map. -
- size_type max_size() const - - Container - - Returns the largest possible size of the dense_hash_map. -
- bool empty() const - - Container - - true if the dense_hash_map's size is 0. -
- size_type bucket_count() const - - Hashed - Associative Container - - Returns the number of buckets used by the dense_hash_map. -
- size_type max_bucket_count() const - - Hashed - Associative Container - - Returns the largest possible number of buckets used by the dense_hash_map. -
- size_type bucket_size(size_type i) const - - Unordered Associative Container (tr1) - - Returns the number of elements in bucket i. For - dense_hash_map, this will be either 0 or 1. -
- size_type bucket(const key_type& key) const - - Unordered Associative Container (tr1) - - If the key exists in the map, returns the index of the bucket - containing the given key, otherwise, return the bucket the key - would be inserted into. - This value may be passed to begin(size_type) and - end(size_type). -
- float load_factor() const - - Unordered Associative Container (tr1) - - The number of elements in the dense_hash_map divided by - the number of buckets. -
- float max_load_factor() const - - Unordered Associative Container (tr1) - - The maximum load factor before increasing the number of buckets in - the dense_hash_map. -
- void max_load_factor(float new_grow) - - Unordered Associative Container (tr1) - - Sets the maximum load factor before increasing the number of - buckets in the dense_hash_map. -
- float min_load_factor() const - - dense_hash_map - - The minimum load factor before decreasing the number of buckets in - the dense_hash_map. -
- void min_load_factor(float new_grow) - - dense_hash_map - - Sets the minimum load factor before decreasing the number of - buckets in the dense_hash_map. -
- void set_resizing_parameters(float shrink, float grow) - - dense_hash_map - - DEPRECATED. See below. -
- void resize(size_type n) - - Hashed - Associative Container - - Increases the bucket count to hold at least n items. - [4] [5] -
- void rehash(size_type n) - - Unordered Associative Container (tr1) - - Increases the bucket count to hold at least n items. - This is identical to resize. - [4] [5] -
- hasher hash_funct() const - - Hashed - Associative Container - - Returns the hasher object used by the dense_hash_map. -
- hasher hash_function() const - - Unordered Associative Container (tr1) - - Returns the hasher object used by the dense_hash_map. - This is idential to hash_funct. -
- key_equal key_eq() const - - Hashed - Associative Container - - Returns the key_equal object used by the - dense_hash_map. -
- dense_hash_map() - - Container - - Creates an empty dense_hash_map. -
- dense_hash_map(size_type n) - - Hashed - Associative Container - - Creates an empty dense_hash_map that's optimized for holding - up to n items. - [5] -
- dense_hash_map(size_type n, const hasher& h) - - Hashed - Associative Container - - Creates an empty dense_hash_map that's optimized for up - to n items, using h as the hash function. -
- dense_hash_map(size_type n, const hasher& h, const - key_equal& k) - - Hashed - Associative Container - - Creates an empty dense_hash_map that's optimized for up - to n items, using h as the hash function and - k as the key equal function. -
-
template <class InputIterator>
-dense_hash_map(InputIterator f, InputIterator l) 
-[2] -
- Unique - Hashed Associative Container - - Creates a dense_hash_map with a copy of a range. -
-
template <class InputIterator>
-dense_hash_map(InputIterator f, InputIterator l, size_type n) 
-[2] -
- Unique - Hashed Associative Container - - Creates a hash_map with a copy of a range that's optimized to - hold up to n items. -
-
template <class InputIterator>
-dense_hash_map(InputIterator f, InputIterator l, size_type n, const
-hasher& h) 
[2] -
- Unique - Hashed Associative Container - - Creates a hash_map with a copy of a range that's optimized to hold - up to n items, using h as the hash function. -
-
template <class InputIterator>
-dense_hash_map(InputIterator f, InputIterator l, size_type n, const
-hasher& h, const key_equal& k) 
[2] -
- Unique - Hashed Associative Container - - Creates a hash_map with a copy of a range that's optimized for - holding up to n items, using h as the hash - function and k as the key equal function. -
- dense_hash_map(const hash_map&) - - Container - - The copy constructor. -
- dense_hash_map& operator=(const hash_map&) - - Container - - The assignment operator -
- void swap(hash_map&) - - Container - - Swaps the contents of two hash_maps. -
-
pair<iterator, bool> insert(const value_type& x)
-
-
- Unique - Associative Container - - Inserts x into the dense_hash_map. -
-
template <class InputIterator>
-void insert(InputIterator f, InputIterator l) 
[2] -
- Unique - Associative Container - - Inserts a range into the dense_hash_map. -
- void set_empty_key(const key_type& key) [6] - - dense_hash_map - - See below. -
- void set_deleted_key(const key_type& key) [6] - - dense_hash_map - - See below. -
- void clear_deleted_key() [6] - - dense_hash_map - - See below. -
- void erase(iterator pos) - - Associative - Container - - Erases the element pointed to by pos. - [6] -
- size_type erase(const key_type& k) - - Associative - Container - - Erases the element whose key is k. - [6] -
- void erase(iterator first, iterator last) - - Associative - Container - - Erases all elements in a range. - [6] -
- void clear() - - Associative - Container - - Erases all of the elements. -
- void clear_no_resize() - - dense_hash_map - - See below. -
- const_iterator find(const key_type& k) const - - Associative - Container - - Finds an element whose key is k. -
- iterator find(const key_type& k) - - Associative - Container - - Finds an element whose key is k. -
- size_type count(const key_type& k) const - - Unique - Associative Container - - Counts the number of elements whose key is k. -
-
pair<const_iterator, const_iterator> equal_range(const
-key_type& k) const 
-
- Associative - Container - - Finds a range containing all elements whose key is k. -
-
pair<iterator, iterator> equal_range(const
-key_type& k) 
-
- Associative - Container - - Finds a range containing all elements whose key is k. -
-
data_type& operator[](const key_type& k) [3] 
-
- dense_hash_map - - See below. -
- bool write_metadata(FILE *fp) - - dense_hash_map - - See below. -
- bool read_metadata(FILE *fp) - - dense_hash_map - - See below. -
- bool write_nopointer_data(FILE *fp) - - dense_hash_map - - See below. -
- bool read_nopointer_data(FILE *fp) - - dense_hash_map - - See below. -
-
bool operator==(const hash_map&, const hash_map&)
-
-
- Hashed - Associative Container - - Tests two hash_maps for equality. This is a global function, not a - member function. -
- - -

New members

- -These members are not defined in the Unique -Hashed Associative Container, Pair -Associative Container, or tr1's +Unordered Associative -Container requirements, but are specific to -dense_hash_map. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
MemberDescription
- void set_empty_key(const key_type& key) - - Sets the distinguished "empty" key to key. This must be - called immediately after construct time, before calls to another - other dense_hash_map operation. [6] -
- void set_deleted_key(const key_type& key) - - Sets the distinguished "deleted" key to key. This must be - called before any calls to erase(). [6] -
- void clear_deleted_key() - - Clears the distinguished "deleted" key. After this is called, - calls to erase() are not valid on this object. - [6] -
- void clear_no_resize() - - Clears the hashtable like clear() does, but does not - recover the memory used for hashtable buckets. (The memory - used by the items in the hashtable is still recovered.) - This can save time for applications that want to reuse a - dense_hash_map many times, each time with a similar number - of objects. -
-
-data_type& 
-operator[](const key_type& k) [3]
-
-
- Returns a reference to the object that is associated with - a particular key. If the dense_hash_map does not already - contain such an object, operator[] inserts the default - object data_type(). [3] -
- void set_resizing_parameters(float shrink, float grow) - - This function is DEPRECATED. It is equivalent to calling - min_load_factor(shrink); max_load_factor(grow). -
- bool write_metadata(FILE *fp) - - Write hashtable metadata to fp. See below. -
- bool read_metadata(FILE *fp) - - Read hashtable metadata from fp. See below. -
- bool write_nopointer_data(FILE *fp) - - Write hashtable contents to fp. This is valid only if the - hashtable key and value are "plain" data. See below. -
- bool read_nopointer_data(FILE *fp) - - Read hashtable contents to fp. This is valid only if the - hashtable key and value are "plain" data. See below. -
- - -

Notes

- -

[1] - -dense_hash_map::iterator is not a mutable iterator, because -dense_hash_map::value_type is not Assignable. -That is, if i is of type dense_hash_map::iterator -and p is of type dense_hash_map::value_type, then -*i = p is not a valid expression. However, -dense_hash_map::iterator isn't a constant iterator either, -because it can be used to modify the object that it points to. Using -the same notation as above, (*i).second = p is a valid -expression.

- -

[2] - -This member function relies on member template functions, which -may not be supported by all compilers. If your compiler supports -member templates, you can call this function with any type of input -iterator. If your compiler does not yet support member templates, -though, then the arguments must either be of type const -value_type* or of type dense_hash_map::const_iterator.

- -

[3] - -Since operator[] might insert a new element into the -dense_hash_map, it can't possibly be a const member -function. Note that the definition of operator[] is -extremely simple: m[k] is equivalent to -(*((m.insert(value_type(k, data_type()))).first)).second. -Strictly speaking, this member function is unnecessary: it exists only -for convenience.

- -

[4] - -In order to preserve iterators, erasing hashtable elements does not -cause a hashtable to resize. This means that after a string of -erase() calls, the hashtable will use more space than is -required. At a cost of invalidating all current iterators, you can -call resize() to manually compact the hashtable. The -hashtable promotes too-small resize() arguments to the -smallest legal value, so to compact a hashtable, it's sufficient to -call resize(0).

- -

[5] - -Unlike some other hashtable implementations, the optional n in -the calls to the constructor, resize, and rehash -indicates not the desired number of buckets that -should be allocated, but instead the expected number of items to be -inserted. The class then sizes the hash-map appropriately for the -number of items specified. It's not an error to actually insert more -or fewer items into the hashtable, but the implementation is most -efficient -- does the fewest hashtable resizes -- if the number of -inserted items is n or slightly less.

- -

[6] - -dense_hash_map requires you call -set_empty_key() immediately after constructing the hash-map, -and before calling any other dense_hash_map method. (This is -the largest difference between the dense_hash_map API and -other hash-map APIs. See implementation.html -for why this is necessary.) -The argument to set_empty_key() should be a key-value that -is never used for legitimate hash-map entries. If you have no such -key value, you will be unable to use dense_hash_map. It is -an error to call insert() with an item whose key is the -"empty key."

- -dense_hash_map also requires you call -set_deleted_key() before calling erase(). -The argument to set_deleted_key() should be a key-value that -is never used for legitimate hash-map entries. It must be different -from the key-value used for set_empty_key(). It is an error to call -erase() without first calling set_deleted_key(), and -it is also an error to call insert() with an item whose key -is the "deleted key."

- -

There is no need to call set_deleted_key if you do not -wish to call erase() on the hash-map.

- -

It is acceptable to change the deleted-key at any time by calling -set_deleted_key() with a new argument. You can also call -clear_deleted_key(), at which point all keys become valid for -insertion but no hashtable entries can be deleted until -set_deleted_key() is called again.

- -

[7] - -dense_hash_map requires that data_type has a -zero-argument default constructor. This is because -dense_hash_map uses the special value pair(empty_key, -data_type()) to denote empty buckets, and thus needs to be able -to create data_type using a zero-argument constructor.

- -

If your data_type does not have a zero-argument default -constructor, there are several workarounds:

-
    -
  • Store a pointer to data_type in the map, instead of - data_type directly. This may yield faster code as - well, since hashtable-resizes will just have to move pointers - around, rather than copying the entire data_type. -
  • Add a zero-argument default constructor to data_type. -
  • Subclass data_type and add a zero-argument default - constructor to the subclass. -
- - -

Input/Output

- -
-

IMPORTANT IMPLEMENTATION NOTE: In the current version of -this code, the input/output routines for dense_hash_map have -not yet been implemented. This section explains the API, but -note that all calls to these routines will fail (return -false). It is a TODO to remedy this situation.

-
- -

It is possible to save and restore dense_hash_map objects -to disk. Storage takes place in two steps. The first writes the -hashtable metadata. The second writes the actual data.

- -

To write a hashtable to disk, first call write_metadata() -on an open file pointer. This saves the hashtable information in a -byte-order-independent format.

- -

After the metadata has been written to disk, you must write the -actual data stored in the hash-map to disk. If both the key and data -are "simple" enough, you can do this by calling -write_nopointer_data(). "Simple" data is data that can be -safely copied to disk via fwrite(). Native C data types fall -into this category, as do structs of native C data types. Pointers -and STL objects do not.

- -

Note that write_nopointer_data() does not do any endian -conversion. Thus, it is only appropriate when you intend to read the -data on the same endian architecture as you write the data.

- -

If you cannot use write_nopointer_data() for any reason, -you can write the data yourself by iterating over the -dense_hash_map with a const_iterator and writing -the key and data in any manner you wish.

- -

To read the hashtable information from disk, first you must create -a dense_hash_map object. Then open a file pointer to point -to the saved hashtable, and call read_metadata(). If you -saved the data via write_nopointer_data(), you can follow the -read_metadata() call with a call to -read_nopointer_data(). This is all that is needed.

- -

If you saved the data through a custom write routine, you must call -a custom read routine to read in the data. To do this, iterate over -the dense_hash_map with an iterator; this operation -is sensical because the metadata has already been set up. For each -iterator item, you can read the key and value from disk, and set it -appropriately. You will need to do a const_cast on the -iterator, since it->first is always const. You -will also need to use placement-new if the key or value is a C++ -object. The code might look like this:

-
-   for (dense_hash_map<int*, ComplicatedClass>::iterator it = ht.begin();
-        it != ht.end(); ++it) {
-       // The key is stored in the dense_hash_map as a pointer
-       const_cast<int*>(it->first) = new int;
-       fread(const_cast<int*>(it->first), sizeof(int), 1, fp);
-       // The value is a complicated C++ class that takes an int to construct
-       int ctor_arg;
-       fread(&ctor_arg, sizeof(int), 1, fp);
-       new (&it->second) ComplicatedClass(ctor_arg);  // "placement new"
-   }
-
- - -

Validity of Iterators

- -

erase() is guaranteed not to invalidate any iterators -- -except for any iterators pointing to the item being erased, of course. -insert() invalidates all iterators, as does -resize().

- -

This is implemented by making erase() not resize the -hashtable. If you desire maximum space efficiency, you can call -resize(0) after a string of erase() calls, to force -the hashtable to resize to the smallest possible size.

- -

In addition to invalidating iterators, insert() -and resize() invalidate all pointers into the hashtable. If -you want to store a pointer to an object held in a dense_hash_map, -either do so after finishing hashtable inserts, or store the object on -the heap and a pointer to it in the dense_hash_map.

- - -

See also

- -

The following are SGI STL, and some Google STL, concepts and -classes related to dense_hash_map.

- -hash_map, -Associative Container, -Hashed Associative Container, -Pair Associative Container, -Unique Hashed Associative Container, -set, -map -multiset, -multimap, -hash_set, -hash_multiset, -hash_multimap, -sparse_hash_map, -sparse_hash_set, -dense_hash_set - - - diff --git a/src/sparsehash-1.6/doc/dense_hash_set.html b/src/sparsehash-1.6/doc/dense_hash_set.html deleted file mode 100644 index 2a5ff2e..0000000 --- a/src/sparsehash-1.6/doc/dense_hash_set.html +++ /dev/null @@ -1,1445 +0,0 @@ - - - - - -dense_hash_set<Key, HashFcn, EqualKey, Alloc> - - - - -

[Note: this document is formatted similarly to the SGI STL -implementation documentation pages, and refers to concepts and classes -defined there. However, neither this document nor the code it -describes is associated with SGI, nor is it necessary to have SGI's -STL implementation installed in order to use this class.]

- - -

dense_hash_set<Key, HashFcn, EqualKey, Alloc>

- -

dense_hash_set is a Hashed -Associative Container that stores objects of type Key. -dense_hash_set is a Simple -Associative Container, meaning that its value type, as well as its -key type, is key. It is also a -Unique -Associative Container, meaning that no two elements have keys that -compare equal using EqualKey.

- -

Looking up an element in a dense_hash_set by its key is -efficient, so dense_hash_set is useful for "dictionaries" -where the order of elements is irrelevant. If it is important for the -elements to be in a particular order, however, then map is more appropriate.

- -

dense_hash_set is distinguished from other hash-set -implementations by its speed and by the ability to save -and restore contents to disk. On the other hand, this hash-set -implementation can use significantly more space than other hash-set -implementations, and it also has requirements -- for instance, for a -distinguished "empty key" -- that may not be easy for all -applications to satisfy.

- -

This class is appropriate for applications that need speedy access -to relatively small "dictionaries" stored in memory, or for -applications that need these dictionaries to be persistent. [implementation note])

- - -

Example

- -(Note: this example uses SGI semantics for hash<> --- the kind used by gcc and most Unix compiler suites -- and not -Dinkumware semantics -- the kind used by Microsoft Visual Studio. If -you are using MSVC, this example will not compile as-is: you'll need -to change hash to hash_compare, and you -won't use eqstr at all. See the MSVC documentation for -hash_map and hash_compare, for more -details.) - -
-#include <iostream>
-#include <google/dense_hash_set>
-
-using google::dense_hash_set;      // namespace where class lives by default
-using std::cout;
-using std::endl;
-using ext::hash;  // or __gnu_cxx::hash, or maybe tr1::hash, depending on your OS
-
-struct eqstr
-{
-  bool operator()(const char* s1, const char* s2) const
-  {
-    return (s1 == s2) || (s1 && s2 && strcmp(s1, s2) == 0);
-  }
-};
-
-void lookup(const hash_set<const char*, hash<const char*>, eqstr>& Set,
-            const char* word)
-{
-  dense_hash_set<const char*, hash<const char*>, eqstr>::const_iterator it
-    = Set.find(word);
-  cout << word << ": "
-       << (it != Set.end() ? "present" : "not present")
-       << endl;
-}
-
-int main()
-{
-  dense_hash_set<const char*, hash<const char*>, eqstr> Set;
-  Set.set_empty_key(NULL);
-  Set.insert("kiwi");
-  Set.insert("plum");
-  Set.insert("apple");
-  Set.insert("mango");
-  Set.insert("apricot");
-  Set.insert("banana");
-
-  lookup(Set, "mango");
-  lookup(Set, "apple");
-  lookup(Set, "durian");
-}
-
- - -

Definition

- -Defined in the header dense_hash_set. -This class is not part of the C++ standard, though it is mostly -compatible with the tr1 class unordered_set. - - -

Template parameters

- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ParameterDescriptionDefault
- Key - - The hash_set's key and value type. This is also defined as - dense_hash_set::key_type and - dense_hash_set::value_type. - -   -
- HashFcn - - The hash function used by the - hash_set. This is also defined as dense_hash_set::hasher. -
Note: Hashtable performance depends heavliy on the choice of - hash function. See the performance - page for more information. -
- hash<Key> -
- EqualKey - - The hash_set key equality function: a binary predicate that determines - whether two keys are equal. This is also defined as - dense_hash_set::key_equal. - - equal_to<Key> -
- Alloc - - Ignored; this is included only for API-compatibility - with SGI's (and tr1's) STL implementation. - -
- - -

Model of

- -Unique Hashed Associative Container, -Simple Associative Container - - -

Type requirements

- -
    -
  • -Key is Assignable. -
  • -EqualKey is a Binary Predicate whose argument type is Key. -
  • -EqualKey is an equivalence relation. -
  • -Alloc is an Allocator. -
- - -

Public base classes

- -None. - - -

Members

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
MemberWhere definedDescription
- value_type - - Container - - The type of object, T, stored in the hash_set. -
- key_type - - Associative - Container - - The key type associated with value_type. -
- hasher - - Hashed - Associative Container - - The dense_hash_set's hash - function. -
- key_equal - - Hashed - Associative Container - - Function - object that compares keys for equality. -
- allocator_type - - Unordered Associative Container (tr1) - - The type of the Allocator given as a template parameter. -
- pointer - - Container - - Pointer to T. -
- reference - - Container - - Reference to T -
- const_reference - - Container - - Const reference to T -
- size_type - - Container - - An unsigned integral type. -
- difference_type - - Container - - A signed integral type. -
- iterator - - Container - - Iterator used to iterate through a dense_hash_set. -
- const_iterator - - Container - - Const iterator used to iterate through a dense_hash_set. - (iterator and const_iterator are the same type.) -
- local_iterator - - Unordered Associative Container (tr1) - - Iterator used to iterate through a subset of - dense_hash_set. -
- const_local_iterator - - Unordered Associative Container (tr1) - - Const iterator used to iterate through a subset of - dense_hash_set. -
- iterator begin() const - - Container - - Returns an iterator pointing to the beginning of the - dense_hash_set. -
- iterator end() const - - Container - - Returns an iterator pointing to the end of the - dense_hash_set. -
- local_iterator begin(size_type i) - - Unordered Associative Container (tr1) - - Returns a local_iterator pointing to the beginning of bucket - i in the dense_hash_set. -
- local_iterator end(size_type i) - - Unordered Associative Container (tr1) - - Returns a local_iterator pointing to the end of bucket - i in the dense_hash_set. For - dense_hash_set, each bucket contains either 0 or 1 item. -
- const_local_iterator begin(size_type i) const - - Unordered Associative Container (tr1) - - Returns a const_local_iterator pointing to the beginning of bucket - i in the dense_hash_set. -
- const_local_iterator end(size_type i) const - - Unordered Associative Container (tr1) - - Returns a const_local_iterator pointing to the end of bucket - i in the dense_hash_set. For - dense_hash_set, each bucket contains either 0 or 1 item. -
- size_type size() const - - Container - - Returns the size of the dense_hash_set. -
- size_type max_size() const - - Container - - Returns the largest possible size of the dense_hash_set. -
- bool empty() const - - Container - - true if the dense_hash_set's size is 0. -
- size_type bucket_count() const - - Hashed - Associative Container - - Returns the number of buckets used by the dense_hash_set. -
- size_type max_bucket_count() const - - Hashed - Associative Container - - Returns the largest possible number of buckets used by the dense_hash_set. -
- size_type bucket_size(size_type i) const - - Unordered Associative Container (tr1) - - Returns the number of elements in bucket i. For - dense_hash_set, this will be either 0 or 1. -
- size_type bucket(const key_type& key) const - - Unordered Associative Container (tr1) - - If the key exists in the map, returns the index of the bucket - containing the given key, otherwise, return the bucket the key - would be inserted into. - This value may be passed to begin(size_type) and - end(size_type). -
- float load_factor() const - - Unordered Associative Container (tr1) - - The number of elements in the dense_hash_set divided by - the number of buckets. -
- float max_load_factor() const - - Unordered Associative Container (tr1) - - The maximum load factor before increasing the number of buckets in - the dense_hash_set. -
- void max_load_factor(float new_grow) - - Unordered Associative Container (tr1) - - Sets the maximum load factor before increasing the number of - buckets in the dense_hash_set. -
- float min_load_factor() const - - dense_hash_set - - The minimum load factor before decreasing the number of buckets in - the dense_hash_set. -
- void min_load_factor(float new_grow) - - dense_hash_set - - Sets the minimum load factor before decreasing the number of - buckets in the dense_hash_set. -
- void set_resizing_parameters(float shrink, float grow) - - dense_hash_set - - DEPRECATED. See below. -
- void resize(size_type n) - - Hashed - Associative Container - - Increases the bucket count to hold at least n items. - [2] [3] -
- void rehash(size_type n) - - Unordered Associative Container (tr1) - - Increases the bucket count to hold at least n items. - This is identical to resize. - [2] [3] -
- hasher hash_funct() const - - Hashed - Associative Container - - Returns the hasher object used by the dense_hash_set. -
- hasher hash_function() const - - Unordered Associative Container (tr1) - - Returns the hasher object used by the dense_hash_set. - This is idential to hash_funct. -
- key_equal key_eq() const - - Hashed - Associative Container - - Returns the key_equal object used by the - dense_hash_set. -
- dense_hash_set() - - Container - - Creates an empty dense_hash_set. -
- dense_hash_set(size_type n) - - Hashed - Associative Container - - Creates an empty dense_hash_set that's optimized for holding - up to n items. - [3] -
- dense_hash_set(size_type n, const hasher& h) - - Hashed - Associative Container - - Creates an empty dense_hash_set that's optimized for up - to n items, using h as the hash function. -
- dense_hash_set(size_type n, const hasher& h, const - key_equal& k) - - Hashed - Associative Container - - Creates an empty dense_hash_set that's optimized for up - to n items, using h as the hash function and - k as the key equal function. -
-
template <class InputIterator>
-dense_hash_set(InputIterator f, InputIterator l) 
-[2] -
- Unique - Hashed Associative Container - - Creates a dense_hash_set with a copy of a range. -
-
template <class InputIterator>
-dense_hash_set(InputIterator f, InputIterator l, size_type n) 
-[2] -
- Unique - Hashed Associative Container - - Creates a hash_set with a copy of a range that's optimized to - hold up to n items. -
-
template <class InputIterator>
-dense_hash_set(InputIterator f, InputIterator l, size_type n, const
-hasher& h) 
[2] -
- Unique - Hashed Associative Container - - Creates a hash_set with a copy of a range that's optimized to hold - up to n items, using h as the hash function. -
-
template <class InputIterator>
-dense_hash_set(InputIterator f, InputIterator l, size_type n, const
-hasher& h, const key_equal& k) 
[2] -
- Unique - Hashed Associative Container - - Creates a hash_set with a copy of a range that's optimized for - holding up to n items, using h as the hash - function and k as the key equal function. -
- dense_hash_set(const hash_set&) - - Container - - The copy constructor. -
- dense_hash_set& operator=(const hash_set&) - - Container - - The assignment operator -
- void swap(hash_set&) - - Container - - Swaps the contents of two hash_sets. -
-
pair<iterator, bool> insert(const value_type& x)
-
-
- Unique - Associative Container - - Inserts x into the dense_hash_set. -
-
template <class InputIterator>
-void insert(InputIterator f, InputIterator l) 
[2] -
- Unique - Associative Container - - Inserts a range into the dense_hash_set. -
- void set_empty_key(const key_type& key) [4] - - dense_hash_set - - See below. -
- void set_deleted_key(const key_type& key) [4] - - dense_hash_set - - See below. -
- void clear_deleted_key() [4] - - dense_hash_set - - See below. -
- void erase(iterator pos) - - Associative - Container - - Erases the element pointed to by pos. - [4] -
- size_type erase(const key_type& k) - - Associative - Container - - Erases the element whose key is k. - [4] -
- void erase(iterator first, iterator last) - - Associative - Container - - Erases all elements in a range. - [4] -
- void clear() - - Associative - Container - - Erases all of the elements. -
- void clear_no_resize() - - dense_hash_map - - See below. -
- iterator find(const key_type& k) const - - Associative - Container - - Finds an element whose key is k. -
- size_type count(const key_type& k) const - - Unique - Associative Container - - Counts the number of elements whose key is k. -
-
pair<iterator, iterator> equal_range(const
-key_type& k) const
-
- Associative - Container - - Finds a range containing all elements whose key is k. -
- bool write_metadata(FILE *fp) - - dense_hash_set - - See below. -
- bool read_metadata(FILE *fp) - - dense_hash_set - - See below. -
- bool write_nopointer_data(FILE *fp) - - dense_hash_set - - See below. -
- bool read_nopointer_data(FILE *fp) - - dense_hash_set - - See below. -
-
bool operator==(const hash_set&, const hash_set&)
-
-
- Hashed - Associative Container - - Tests two hash_sets for equality. This is a global function, not a - member function. -
- - -

New members

- -These members are not defined in the Unique -Hashed Associative Container, Simple -Associative Container, or tr1's Unordered Associative -Container requirements, but are specific to -dense_hash_set. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
MemberDescription
- void set_empty_key(const key_type& key) - - Sets the distinguished "empty" key to key. This must be - called immediately after construct time, before calls to another - other dense_hash_set operation. [4] -
- void set_deleted_key(const key_type& key) - - Sets the distinguished "deleted" key to key. This must be - called before any calls to erase(). [4] -
- void clear_deleted_key() - - Clears the distinguished "deleted" key. After this is called, - calls to erase() are not valid on this object. - [4] -
- void clear_no_resize() - - Clears the hashtable like clear() does, but does not - recover the memory used for hashtable buckets. (The memory - used by the items in the hashtable is still recovered.) - This can save time for applications that want to reuse a - dense_hash_set many times, each time with a similar number - of objects. -
- void set_resizing_parameters(float shrink, float grow) - - This function is DEPRECATED. It is equivalent to calling - min_load_factor(shrink); max_load_factor(grow). -
- bool write_metadata(FILE *fp) - - Write hashtable metadata to fp. See below. -
- bool read_metadata(FILE *fp) - - Read hashtable metadata from fp. See below. -
- bool write_nopointer_data(FILE *fp) - - Write hashtable contents to fp. This is valid only if the - hashtable key and value are "plain" data. See below. -
- bool read_nopointer_data(FILE *fp) - - Read hashtable contents to fp. This is valid only if the - hashtable key and value are "plain" data. See below. -
- - -

Notes

- -

[1] - -This member function relies on member template functions, which -may not be supported by all compilers. If your compiler supports -member templates, you can call this function with any type of input -iterator. If your compiler does not yet support member templates, -though, then the arguments must either be of type const -value_type* or of type dense_hash_set::const_iterator.

- -

[2] - -In order to preserve iterators, erasing hashtable elements does not -cause a hashtable to resize. This means that after a string of -erase() calls, the hashtable will use more space than is -required. At a cost of invalidating all current iterators, you can -call resize() to manually compact the hashtable. The -hashtable promotes too-small resize() arguments to the -smallest legal value, so to compact a hashtable, it's sufficient to -call resize(0). - -

[3] - -Unlike some other hashtable implementations, the optional n in -the calls to the constructor, resize, and rehash -indicates not the desired number of buckets that -should be allocated, but instead the expected number of items to be -inserted. The class then sizes the hash-set appropriately for the -number of items specified. It's not an error to actually insert more -or fewer items into the hashtable, but the implementation is most -efficient -- does the fewest hashtable resizes -- if the number of -inserted items is n or slightly less.

- -

[4] - -dense_hash_set requires you call -set_empty_key() immediately after constructing the hash-set, -and before calling any other dense_hash_set method. (This is -the largest difference between the dense_hash_set API and -other hash-set APIs. See implementation.html -for why this is necessary.) -The argument to set_empty_key() should be a key-value that -is never used for legitimate hash-set entries. If you have no such -key value, you will be unable to use dense_hash_set. It is -an error to call insert() with an item whose key is the -"empty key."

- -dense_hash_set also requires you call -set_deleted_key() before calling erase(). -The argument to set_deleted_key() should be a key-value that -is never used for legitimate hash-set entries. It must be different -from the key-value used for set_empty_key(). It is an error to call -erase() without first calling set_deleted_key(), and -it is also an error to call insert() with an item whose key -is the "deleted key."

- -

There is no need to call set_deleted_key if you do not -wish to call erase() on the hash-set.

- -

It is acceptable to change the deleted-key at any time by calling -set_deleted_key() with a new argument. You can also call -clear_deleted_key(), at which point all keys become valid for -insertion but no hashtable entries can be deleted until -set_deleted_key() is called again.

- - -

Input/Output

- -
-

IMPORTANT IMPLEMENTATION NOTE: In the current version of -this code, the input/output routines for dense_hash_set have -not yet been implemented. This section explains the API, but -note that all calls to these routines will fail (return -false). It is a TODO to remedy this situation.

-
- -

It is possible to save and restore dense_hash_set objects -to disk. Storage takes place in two steps. The first writes the -hashtable metadata. The second writes the actual data.

- -

To write a hashtable to disk, first call write_metadata() -on an open file pointer. This saves the hashtable information in a -byte-order-independent format.

- -

After the metadata has been written to disk, you must write the -actual data stored in the hash-set to disk. If both the key and data -are "simple" enough, you can do this by calling -write_nopointer_data(). "Simple" data is data that can be -safely copied to disk via fwrite(). Native C data types fall -into this category, as do structs of native C data types. Pointers -and STL objects do not.

- -

Note that write_nopointer_data() does not do any endian -conversion. Thus, it is only appropriate when you intend to read the -data on the same endian architecture as you write the data.

- -

If you cannot use write_nopointer_data() for any reason, -you can write the data yourself by iterating over the -dense_hash_set with a const_iterator and writing -the key and data in any manner you wish.

- -

To read the hashtable information from disk, first you must create -a dense_hash_set object. Then open a file pointer to point -to the saved hashtable, and call read_metadata(). If you -saved the data via write_nopointer_data(), you can follow the -read_metadata() call with a call to -read_nopointer_data(). This is all that is needed.

- -

If you saved the data through a custom write routine, you must call -a custom read routine to read in the data. To do this, iterate over -the dense_hash_set with an iterator; this operation -is sensical because the metadata has already been set up. For each -iterator item, you can read the key and value from disk, and set it -appropriately. You will need to do a const_cast on the -iterator, since *it is always const. The -code might look like this:

-
-   for (dense_hash_set<int*>::iterator it = ht.begin();
-        it != ht.end(); ++it) {
-       const_cast<int*>(*it) = new int;
-       fread(const_cast<int*>(*it), sizeof(int), 1, fp);
-   }
-
- -

Here's another example, where the item stored in the hash-set is -a C++ object with a non-trivial constructor. In this case, you must -use "placement new" to construct the object at the correct memory -location.

-
-   for (dense_hash_set<ComplicatedClass>::iterator it = ht.begin();
-        it != ht.end(); ++it) {
-       int ctor_arg;  // ComplicatedClass takes an int as its constructor arg
-       fread(&ctor_arg, sizeof(int), 1, fp);
-       new (const_cast<ComplicatedClass*>(&(*it))) ComplicatedClass(ctor_arg);
-   }
-
- - -

Validity of Iterators

- -

erase() is guaranteed not to invalidate any iterators -- -except for any iterators pointing to the item being erased, of course. -insert() invalidates all iterators, as does -resize().

- -

This is implemented by making erase() not resize the -hashtable. If you desire maximum space efficiency, you can call -resize(0) after a string of erase() calls, to force -the hashtable to resize to the smallest possible size.

- -

In addition to invalidating iterators, insert() -and resize() invalidate all pointers into the hashtable. If -you want to store a pointer to an object held in a dense_hash_set, -either do so after finishing hashtable inserts, or store the object on -the heap and a pointer to it in the dense_hash_set.

- - - -

See also

- -

The following are SGI STL, and some Google STL, concepts and -classes related to dense_hash_set.

- -hash_set, -Associative Container, -Hashed Associative Container, -Simple Associative Container, -Unique Hashed Associative Container, -set, -map -multiset, -multimap, -hash_map, -hash_multiset, -hash_multimap, -sparse_hash_set, -sparse_hash_map, -dense_hash_map - - - diff --git a/src/sparsehash-1.6/doc/designstyle.css b/src/sparsehash-1.6/doc/designstyle.css deleted file mode 100644 index f5d1ec2..0000000 --- a/src/sparsehash-1.6/doc/designstyle.css +++ /dev/null @@ -1,115 +0,0 @@ -body { - background-color: #ffffff; - color: black; - margin-right: 1in; - margin-left: 1in; -} - - -h1, h2, h3, h4, h5, h6 { - color: #3366ff; - font-family: sans-serif; -} -@media print { - /* Darker version for printing */ - h1, h2, h3, h4, h5, h6 { - color: #000080; - font-family: helvetica, sans-serif; - } -} - -h1 { - text-align: center; - font-size: 18pt; -} -h2 { - margin-left: -0.5in; -} -h3 { - margin-left: -0.25in; -} -h4 { - margin-left: -0.125in; -} -hr { - margin-left: -1in; -} - -/* Definition lists: definition term bold */ -dt { - font-weight: bold; -} - -address { - text-align: right; -} -/* Use the tag for bits of code and for variables and objects. */ -code,pre,samp,var { - color: #006000; -} -/* Use the tag for file and directory paths and names. */ -file { - color: #905050; - font-family: monospace; -} -/* Use the tag for stuff the user should type. */ -kbd { - color: #600000; -} -div.note p { - float: right; - width: 3in; - margin-right: 0%; - padding: 1px; - border: 2px solid #6060a0; - background-color: #fffff0; -} - -UL.nobullets { - list-style-type: none; - list-style-image: none; - margin-left: -1em; -} - -/* -body:after { - content: "Google Confidential"; -} -*/ - -/* pretty printing styles. See prettify.js */ -.str { color: #080; } -.kwd { color: #008; } -.com { color: #800; } -.typ { color: #606; } -.lit { color: #066; } -.pun { color: #660; } -.pln { color: #000; } -.tag { color: #008; } -.atn { color: #606; } -.atv { color: #080; } -pre.prettyprint { padding: 2px; border: 1px solid #888; } - -.embsrc { background: #eee; } - -@media print { - .str { color: #060; } - .kwd { color: #006; font-weight: bold; } - .com { color: #600; font-style: italic; } - .typ { color: #404; font-weight: bold; } - .lit { color: #044; } - .pun { color: #440; } - .pln { color: #000; } - .tag { color: #006; font-weight: bold; } - .atn { color: #404; } - .atv { color: #060; } -} - -/* Table Column Headers */ -.hdr { - color: #006; - font-weight: bold; - background-color: #dddddd; } -.hdr2 { - color: #006; - background-color: #eeeeee; } \ No newline at end of file diff --git a/src/sparsehash-1.6/doc/implementation.html b/src/sparsehash-1.6/doc/implementation.html deleted file mode 100644 index 2050d54..0000000 --- a/src/sparsehash-1.6/doc/implementation.html +++ /dev/null @@ -1,371 +0,0 @@ - - - -Implementation notes: sparse_hash, dense_hash, sparsetable - - - - -

Implementation of sparse_hash_map, dense_hash_map, and -sparsetable

- -This document contains a few notes on how the data structures in this -package are implemented. This discussion refers at several points to -the classic text in this area: Knuth, The Art of Computer -Programming, Vol 3, Hashing. - - -
-

sparsetable

- -

For specificity, consider the declaration

- -
-   sparsetable<Foo> t(100);        // a sparse array with 100 elements
-
- -

A sparsetable is a random container that implements a sparse array, -that is, an array that uses very little memory to store unassigned -indices (in this case, between 1-2 bits per unassigned index). For -instance, if you allocate an array of size 5 and assign a[2] = [big -struct], then a[2] will take up a lot of memory but a[0], a[1], a[3], -and a[4] will not. Array elements that have a value are called -"assigned". Array elements that have no value yet, or have had their -value cleared using erase() or clear(), are called "unassigned". -For assigned elements, lookups return the assigned value; for -unassigned elements, they return the default value, which for t is -Foo().

- -

sparsetable is implemented as an array of "groups". Each group is -responsible for M array indices. The first group knows about -t[0]..t[M-1], the second about t[M]..t[2M-1], and so forth. (M is 48 -by default.) At construct time, t creates an array of (99/M + 1) -groups. From this point on, all operations -- insert, delete, lookup --- are passed to the appropriate group. In particular, any operation -on t[i] is actually performed on (t.group[i / M])[i % M].

- -

Each group contains of a vector, which holds assigned values, and a -bitmap of size M, which indicates which indices are assigned. A -lookup works as follows: the group is asked to look up index i, where -i < M. The group looks at bitmap[i]. If it's 0, the lookup fails. -If it's 1, then the group has to find the appropriate value in the -vector.

- -

find()

- -

Finding the appropriate vector element is the most expensive part of -the lookup. The code counts all bitmap entries <= i that are set to -1. (There's at least 1 of them, since bitmap[i] is 1.) Suppose there -are 4 such entries. Then the right value to return is the 4th element -of the vector: vector[3]. This takes time O(M), which is a constant -since M is a constant.

- -

insert()

- -

Insert starts with a lookup. If the lookup succeeds, the code merely -replaces vector[3] with the new value. If the lookup fails, then the -code must insert a new entry into the middle of the vector. Again, to -insert at position i, the code must count all the bitmap entries <= i -that are set to i. This indicates the position to insert into the -vector. All vector entries above that position must be moved to make -room for the new entry. This takes time, but still constant time -since the vector has size at most M.

- -

(Inserts could be made faster by using a list instead of a vector to -hold group values, but this would use much more memory, since each -list element requires a full pointer of overhead.)

- -

The only metadata that needs to be updated, after the actual value is -inserted, is to set bitmap[i] to 1. No other counts must be -maintained.

- -

delete()

- -

Deletes are similar to inserts. They start with a lookup. If it -fails, the delete is a noop. Otherwise, the appropriate entry is -removed from the vector, all the vector elements above it are moved -down one, and bitmap[i] is set to 0.

- -

iterators

- -

Sparsetable iterators pose a special burden. They must iterate over -unassigned array values, but the act of iterating should not cause an -assignment to happen -- otherwise, iterating over a sparsetable would -cause it to take up much more room. For const iterators, the matter -is simple: the iterator is merely programmed to return the default -value -- Foo() -- when dereferenced while pointing to an unassigned -entry.

- -

For non-const iterators, such simple techniques fail. Instead, -dereferencing a sparsetable_iterator returns an opaque object that -acts like a Foo in almost all situations, but isn't actually a Foo. -(It does this by defining operator=(), operator value_type(), and, -most sneakily, operator&().) This works in almost all cases. If it -doesn't, an explicit cast to value_type will solve the problem:

- -
-   printf("%d", static_cast<Foo>(*t.find(0)));
-
- -

To avoid such problems, consider using get() and set() instead of an -iterator:

- -
-   for (int i = 0; i < t.size(); ++i)
-      if (t.get(i) == ...)  t.set(i, ...);
-
- -

Sparsetable also has a special class of iterator, besides normal and -const: nonempty_iterator. This only iterates over array values that -are assigned. This is particularly fast given the sparsetable -implementation, since it can ignore the bitmaps entirely and just -iterate over the various group vectors.

- -

Resource use

- -

The space overhead for an sparsetable of size N is N + 48N/M bits. -For the default value of M, this is exactly 2 bits per array entry. -(That's for 32-bit pointers; for machines with 64-bit pointers, it's N -+ 80N/M bits, or 2.67 bits per entry.) -A larger M would use less overhead -- approaching 1 bit per array -entry -- but take longer for inserts, deletes, and lookups. A smaller -M would use more overhead but make operations somewhat faster.

- -

You can also look at some specific performance numbers.

- - -
-

sparse_hash_set

- -

For specificity, consider the declaration

- -
-   sparse_hash_set<Foo> t;
-
- -

sparse_hash_set is a hashtable. For more information on hashtables, -see Knuth. Hashtables are basically arrays with complicated logic on -top of them. sparse_hash_set uses a sparsetable to implement the -underlying array.

- -

In particular, sparse_hash_set stores its data in a sparsetable using -quadratic internal probing (see Knuth). Many hashtable -implementations use external probing, so each table element is -actually a pointer chain, holding many hashtable values. -sparse_hash_set, on the other hand, always stores at most one value in -each table location. If the hashtable wants to store a second value -at a given table location, it can't; it's forced to look somewhere -else.

- -

insert()

- -

As a specific example, suppose t is a new sparse_hash_set. It then -holds a sparsetable of size 32. The code for t.insert(foo) works as -follows:

- -

-1) Call hash<Foo>(foo) to convert foo into an integer i. (hash<Foo> is - the default hash function; you can specify a different one in the - template arguments.) - -

-2a) Look at t.sparsetable[i % 32]. If it's unassigned, assign it to - foo. foo is now in the hashtable. - -

-2b) If t.sparsetable[i % 32] is assigned, and its value is foo, then - do nothing: foo was already in t and the insert is a noop. - -

-2c) If t.sparsetable[i % 32] is assigned, but to a value other than - foo, look at t.sparsetable[(i+1) % 32]. If that also fails, try - t.sparsetable[(i+3) % 32], then t.sparsetable[(i+6) % 32]. In - general, keep trying the next triangular number. - -

-3) If the table is now "too full" -- say, 25 of the 32 table entries - are now assigned -- grow the table by creating a new sparsetable - that's twice as big, and rehashing every single element from the - old table into the new one. This keeps the table from ever filling - up. - -

-4) If the table is now "too empty" -- say, only 3 of the 32 table - entries are now assigned -- shrink the table by creating a new - sparsetable that's half as big, and rehashing every element as in - the growing case. This keeps the table overhead proportional to - the number of elements in the table. -

- -

Instead of using triangular numbers as offsets, one could just use -regular integers: try i, then i+1, then i+2, then i+3. This has bad -'clumping' behavior, as explored in Knuth. Quadratic probing, using -the triangular numbers, avoids the clumping while keeping cache -coherency in the common case. As long as the table size is a power of -2, the quadratic-probing method described above will explore every -table element if necessary, to find a good place to insert.

- -

(As a side note, using a table size that's a power of two has several -advantages, including the speed of calculating (i % table_size). On -the other hand, power-of-two tables are not very forgiving of a poor -hash function. Make sure your hash function is a good one! There are -plenty of dos and don'ts on the web (and in Knuth), for writing hash -functions.)

- -

The "too full" value, also called the "maximum occupancy", determines -a time-space tradeoff: in general, the higher it is, the less space is -wasted but the more probes must be performed for each insert. -sparse_hash_set uses a high maximum occupancy, since space is more -important than speed for this data structure.

- -

The "too empty" value is not necessary for performance but helps with -space use. It's rare for hashtable implementations to check this -value at insert() time -- after all, how will inserting cause a -hashtable to get too small? However, the sparse_hash_set -implementation never resizes on erase(); it's nice to have an erase() -that does not invalidate iterators. Thus, the first insert() after a -long string of erase()s could well trigger a hashtable shrink.

- -

find()

- -

find() works similarly to insert. The only difference is in step -(2a): if the value is unassigned, then the lookup fails immediately.

- -

delete()

- -

delete() is tricky in an internal-probing scheme. The obvious -implementation of just "unassigning" the relevant table entry doesn't -work. Consider the following scenario:

- -
-    t.insert(foo1);         // foo1 hashes to 4, is put in table[4]
-    t.insert(foo2);         // foo2 hashes to 4, is put in table[5]
-    t.erase(foo1);          // table[4] is now 'unassigned'
-    t.lookup(foo2);         // fails since table[hash(foo2)] is unassigned
-
- -

To avoid these failure situations, delete(foo1) is actually -implemented by replacing foo1 by a special 'delete' value in the -hashtable. This 'delete' value causes the table entry to be -considered unassigned for the purposes of insertion -- if foo3 hashes -to 4 as well, it can go into table[4] no problem -- but assigned for -the purposes of lookup.

- -

What is this special 'delete' value? The delete value has to be an -element of type Foo, since the table can't hold anything else. It -obviously must be an element the client would never want to insert on -its own, or else the code couldn't distinguish deleted entries from -'real' entries with the same value. There's no way to determine a -good value automatically. The client has to specify it explicitly. -This is what the set_deleted_key() method does.

- -

Note that set_deleted_key() is only necessary if the client actually -wants to call t.erase(). For insert-only hash-sets, set_deleted_key() -is unnecessary.

- -

When copying the hashtable, either to grow it or shrink it, the -special 'delete' values are not copied into the new table. The -copy-time rehash makes them unnecessary.

- -

Resource use

- -

The data is stored in a sparsetable, so space use is the same as -for sparsetable. However, by default the sparse_hash_set -implementation tries to keep about half the table buckets empty, to -keep lookup-chains short. Since sparsehashmap has about 2 bits -overhead per bucket (or 2.5 bits on 64-bit systems), sparse_hash_map -has about 4-5 bits overhead per hashtable item.

- -

Time use is also determined in large part by the sparsetable -implementation. However, there is also an extra probing cost in -hashtables, which depends in large part on the "too full" value. It -should be rare to need more than 4-5 probes per lookup, and usually -significantly less will suffice.

- -

A note on growing and shrinking the hashtable: all hashtable -implementations use the most memory when growing a hashtable, since -they must have room for both the old table and the new table at the -same time. sparse_hash_set is careful to delete entries from the old -hashtable as soon as they're copied into the new one, to minimize this -space overhead. (It does this efficiently by using its knowledge of -the sparsetable class and copying one sparsetable group at a time.)

- -

You can also look at some specific performance numbers.

- - -
-

sparse_hash_map

- -

sparse_hash_map is implemented identically to sparse_hash_set. The -only difference is instead of storing just Foo in each table entry, -the data structure stores pair<Foo, Value>.

- - -
-

dense_hash_set

- -

The hashtable aspects of dense_hash_set are identical to -sparse_hash_set: it uses quadratic internal probing, and resizes -hashtables in exactly the same way. The difference is in the -underlying array: instead of using a sparsetable, dense_hash_set uses -a C array. This means much more space is used, especially if Foo is -big. However, it makes all operations faster, since sparsetable has -memory management overhead that C arrays do not.

- -

The use of C arrays instead of sparsetables points to one immediate -complication dense_hash_set has that sparse_hash_set does not: the -need to distinguish assigned from unassigned entries. In a -sparsetable, this is accomplished by a bitmap. dense_hash_set, on the -other hand, uses a dedicated value to specify unassigned entries. -Thus, dense_hash_set has two special values: one to indicate deleted -table entries, and one to indicated unassigned table entries. At -construct time, all table entries are initialized to 'unassigned'.

- -

dense_hash_set provides the method set_empty_key() to indicate the -value that should be used for unassigned entries. Like -set_deleted_key(), set_empty_key() requires a value that will not be -used by the client for any legitimate purpose. Unlike -set_deleted_key(), set_empty_key() is always required, no matter what -hashtable operations the client wishes to perform.

- -

Resource use

- -

This implementation is fast because even though dense_hash_set may not -be space efficient, most lookups are localized: a single lookup may -need to access table[i], and maybe table[i+1] and table[i+3], but -nothing other than that. For all but the biggest data structures, -these will frequently be in a single cache line.

- -

This implementation takes, for every unused bucket, space as big as -the key-type. Usually between half and two-thirds of the buckets are -empty.

- -

The doubling method used by dense_hash_set tends to work poorly -with most memory allocators. This is because memory allocators tend -to have memory 'buckets' which are a power of two. Since each -doubling of a dense_hash_set doubles the memory use, a single -hashtable doubling will require a new memory 'bucket' from the memory -allocator, leaving the old bucket stranded as fragmented memory. -Hence, it's not recommended this data structure be used with many -inserts in memory-constrained situations.

- -

You can also look at some specific performance numbers.

- - -
-

dense_hash_map

- -

dense_hash_map is identical to dense_hash_set except for what values -are stored in each table entry.

- -
- -Craig Silverstein
-Thu Jan 6 20:15:42 PST 2005 -
- - - diff --git a/src/sparsehash-1.6/doc/index.html b/src/sparsehash-1.6/doc/index.html deleted file mode 100644 index 68a5865..0000000 --- a/src/sparsehash-1.6/doc/index.html +++ /dev/null @@ -1,69 +0,0 @@ - - - - Google Sparsehash Package - - - - - - - - -

Google Sparsehash Package

-
- -

The Google sparsehash package consists of two hashtable -implementations: sparse, which is designed to be very space -efficient, and dense, which is designed to be very time -efficient. For each one, the package provides both a hash-map and a -hash-set, to mirror the classes in the common STL implementation.

- -

Documentation on how to use these classes:

- - -

In addition to the hash-map (and hash-set) classes, there's also a -lower-level class that implements a "sparse" array. This class can be -useful in its own right; consider using it when you'd normally use a -sparse_hash_map, but your keys are all small-ish -integers.

- - -

There is also a doc explaining the implementation details of these -classes, for those who are curious. And finally, you can see some -performance comparisons, both between -the various classes here, but also between these implementations and -other standard hashtable implementations.

- -
-
-Craig Silverstein
-Last modified: Thu Jan 25 17:58:02 PST 2007 -
- - - diff --git a/src/sparsehash-1.6/doc/performance.html b/src/sparsehash-1.6/doc/performance.html deleted file mode 100644 index 40c1406..0000000 --- a/src/sparsehash-1.6/doc/performance.html +++ /dev/null @@ -1,96 +0,0 @@ - - - -Performance notes: sparse_hash, dense_hash, sparsetable - - - - -

Performance Numbers

- -

Here are some performance numbers from an example desktop machine, -taken from a version of time_hash_map that was instrumented to also -report memory allocation information (this modification is not -included by default because it required a big hack to do, including -modifying the STL code to not try to do its own freelist management).

- -

Note there are lots of caveats on these numbers: they may differ from -machine to machine and compiler to compiler, and they only test a very -particular usage pattern that may not match how you use hashtables -- -for instance, they test hashtables with very small keys. However, -they're still useful for a baseline comparison of the various -hashtable implementations.

- -

These figures are from a 2.80GHz Pentium 4 with 2G of memory. The -'standard' hash_map and map implementations are the SGI STL code -included with gcc2. Compiled with gcc2.95.3 -g --O2

- -
-======
-Average over 10000000 iterations
-Wed Dec  8 14:56:38 PST 2004
-
-SPARSE_HASH_MAP:
-map_grow                  665 ns
-map_predict/grow          303 ns
-map_replace               177 ns
-map_fetch                 117 ns
-map_remove                192 ns
-memory used in map_grow    84.3956 Mbytes
-
-DENSE_HASH_MAP:
-map_grow                   84 ns
-map_predict/grow           22 ns
-map_replace                18 ns
-map_fetch                  13 ns
-map_remove                 23 ns
-memory used in map_grow   256.0000 Mbytes
-
-STANDARD HASH_MAP:
-map_grow                  162 ns
-map_predict/grow          107 ns
-map_replace                44 ns
-map_fetch                  22 ns
-map_remove                124 ns
-memory used in map_grow   204.1643 Mbytes
-
-STANDARD MAP:
-map_grow                  297 ns
-map_predict/grow          282 ns
-map_replace               113 ns
-map_fetch                 113 ns
-map_remove                238 ns
-memory used in map_grow   236.8081 Mbytes
-
- - -

A Note on Hash Functions

- -

For good performance, the Google hash routines depend on a good -hash function: one that distributes data evenly. Many hashtable -implementations come with sub-optimal hash functions that can degrade -performance. For instance, the hash function given in Knuth's _Art of -Computer Programming_, and the default string hash function in SGI's -STL implementation, both distribute certain data sets unevenly, -leading to poor performance.

- -

As an example, in one test of the default SGI STL string hash -function against the Hsieh hash function (see below), for a particular -set of string keys, the Hsieh function resulted in hashtable lookups -that were 20 times as fast as the STLPort hash function. The string -keys were chosen to be "hard" to hash well, so these results may not -be typical, but they are suggestive.

- -

There has been much research over the years into good hash -functions. Here are some hash functions of note.

- - - - - diff --git a/src/sparsehash-1.6/doc/sparse_hash_map.html b/src/sparsehash-1.6/doc/sparse_hash_map.html deleted file mode 100644 index 63055c9..0000000 --- a/src/sparsehash-1.6/doc/sparse_hash_map.html +++ /dev/null @@ -1,1527 +0,0 @@ - - - - - -sparse_hash_map<Key, Data, HashFcn, EqualKey, Alloc> - - - - -

[Note: this document is formatted similarly to the SGI STL -implementation documentation pages, and refers to concepts and classes -defined there. However, neither this document nor the code it -describes is associated with SGI, nor is it necessary to have SGI's -STL implementation installed in order to use this class.]

- - -

sparse_hash_map<Key, Data, HashFcn, EqualKey, Alloc>

- -

sparse_hash_map is a Hashed -Associative Container that associates objects of type Key -with objects of type Data. sparse_hash_map is a Pair -Associative Container, meaning that its value type is pair<const Key, Data>. It is also a -Unique -Associative Container, meaning that no two elements have keys that -compare equal using EqualKey.

- -

Looking up an element in a sparse_hash_map by its key is -efficient, so sparse_hash_map is useful for "dictionaries" -where the order of elements is irrelevant. If it is important for the -elements to be in a particular order, however, then map is more appropriate.

- -

sparse_hash_map is distinguished from other hash-map -implementations by its stingy use of memory and by the ability to save -and restore contents to disk. On the other hand, this hash-map -implementation, while still efficient, is slower than other hash-map -implementations, and it also has requirements -- for instance, for a -distinguished "deleted key" -- that may not be easy for all -applications to satisfy.

- -

This class is appropriate for applications that need to store -large "dictionaries" in memory, or for applications that need these -dictionaries to be persistent.

- - -

Example

- -(Note: this example uses SGI semantics for hash<> --- the kind used by gcc and most Unix compiler suites -- and not -Dinkumware semantics -- the kind used by Microsoft Visual Studio. If -you are using MSVC, this example will not compile as-is: you'll need -to change hash to hash_compare, and you -won't use eqstr at all. See the MSVC documentation for -hash_map and hash_compare, for more -details.) - -
-#include <iostream>
-#include <google/sparse_hash_map>
-
-using google::sparse_hash_map;      // namespace where class lives by default
-using std::cout;
-using std::endl;
-using ext::hash;  // or __gnu_cxx::hash, or maybe tr1::hash, depending on your OS
-
-struct eqstr
-{
-  bool operator()(const char* s1, const char* s2) const
-  {
-    return (s1 == s2) || (s1 && s2 && strcmp(s1, s2) == 0);
-  }
-};
-
-int main()
-{
-  sparse_hash_map<const char*, int, hash<const char*>, eqstr> months;
-  
-  months["january"] = 31;
-  months["february"] = 28;
-  months["march"] = 31;
-  months["april"] = 30;
-  months["may"] = 31;
-  months["june"] = 30;
-  months["july"] = 31;
-  months["august"] = 31;
-  months["september"] = 30;
-  months["october"] = 31;
-  months["november"] = 30;
-  months["december"] = 31;
-  
-  cout << "september -> " << months["september"] << endl;
-  cout << "april     -> " << months["april"] << endl;
-  cout << "june      -> " << months["june"] << endl;
-  cout << "november  -> " << months["november"] << endl;
-}
-
- - -

Definition

- -Defined in the header sparse_hash_map. -This class is not part of the C++ standard, though it is mostly -compatible with the tr1 class unordered_map. - - -

Template parameters

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ParameterDescriptionDefault
- Key - - The hash_map's key type. This is also defined as - sparse_hash_map::key_type. - -   -
- Data - - The hash_map's data type. This is also defined as - sparse_hash_map::data_type. - -   -
- HashFcn - - The hash function used by the - hash_map. This is also defined as sparse_hash_map::hasher. -
Note: Hashtable performance depends heavliy on the choice of - hash function. See the performance - page for more information. -
- hash<Key> -
- EqualKey - - The hash_map key equality function: a binary predicate that determines - whether two keys are equal. This is also defined as - sparse_hash_map::key_equal. - - equal_to<Key> -
- Alloc - - Ignored; this is included only for API-compatibility - with SGI's (and tr1's) STL implementation. - -
- - -

Model of

- -Unique Hashed Associative Container, -Pair Associative Container - - -

Type requirements

- -
    -
  • -Key is Assignable. -
  • -EqualKey is a Binary Predicate whose argument type is Key. -
  • -EqualKey is an equivalence relation. -
  • -Alloc is an Allocator. -
- - -

Public base classes

- -None. - - -

Members

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
MemberWhere definedDescription
- key_type - - Associative - Container - - The sparse_hash_map's key type, Key. -
- data_type - - Pair - Associative Container - - The type of object associated with the keys. -
- value_type - - Pair - Associative Container - - The type of object, pair<const key_type, data_type>, - stored in the hash_map. -
- hasher - - Hashed - Associative Container - - The sparse_hash_map's hash - function. -
- key_equal - - Hashed - Associative Container - - Function - object that compares keys for equality. -
- allocator_type - - Unordered Associative Container (tr1) - - The type of the Allocator given as a template parameter. -
- pointer - - Container - - Pointer to T. -
- reference - - Container - - Reference to T -
- const_reference - - Container - - Const reference to T -
- size_type - - Container - - An unsigned integral type. -
- difference_type - - Container - - A signed integral type. -
- iterator - - Container - - Iterator used to iterate through a sparse_hash_map. [1] -
- const_iterator - - Container - - Const iterator used to iterate through a sparse_hash_map. -
- local_iterator - - Unordered Associative Container (tr1) - - Iterator used to iterate through a subset of - sparse_hash_map. [1] -
- const_local_iterator - - Unordered Associative Container (tr1) - - Const iterator used to iterate through a subset of - sparse_hash_map. -
- iterator begin() - - Container - - Returns an iterator pointing to the beginning of the - sparse_hash_map. -
- iterator end() - - Container - - Returns an iterator pointing to the end of the - sparse_hash_map. -
- const_iterator begin() const - - Container - - Returns an const_iterator pointing to the beginning of the - sparse_hash_map. -
- const_iterator end() const - - Container - - Returns an const_iterator pointing to the end of the - sparse_hash_map. -
- local_iterator begin(size_type i) - - Unordered Associative Container (tr1) - - Returns a local_iterator pointing to the beginning of bucket - i in the sparse_hash_map. -
- local_iterator end(size_type i) - - Unordered Associative Container (tr1) - - Returns a local_iterator pointing to the end of bucket - i in the sparse_hash_map. For - sparse_hash_map, each bucket contains either 0 or 1 item. -
- const_local_iterator begin(size_type i) const - - Unordered Associative Container (tr1) - - Returns a const_local_iterator pointing to the beginning of bucket - i in the sparse_hash_map. -
- const_local_iterator end(size_type i) const - - Unordered Associative Container (tr1) - - Returns a const_local_iterator pointing to the end of bucket - i in the sparse_hash_map. For - sparse_hash_map, each bucket contains either 0 or 1 item. -
- size_type size() const - - Container - - Returns the size of the sparse_hash_map. -
- size_type max_size() const - - Container - - Returns the largest possible size of the sparse_hash_map. -
- bool empty() const - - Container - - true if the sparse_hash_map's size is 0. -
- size_type bucket_count() const - - Hashed - Associative Container - - Returns the number of buckets used by the sparse_hash_map. -
- size_type max_bucket_count() const - - Hashed - Associative Container - - Returns the largest possible number of buckets used by the sparse_hash_map. -
- size_type bucket_size(size_type i) const - - Unordered Associative Container (tr1) - - Returns the number of elements in bucket i. For - sparse_hash_map, this will be either 0 or 1. -
- size_type bucket(const key_type& key) const - - Unordered Associative Container (tr1) - - If the key exists in the map, returns the index of the bucket - containing the given key, otherwise, return the bucket the key - would be inserted into. - This value may be passed to begin(size_type) and - end(size_type). -
- float load_factor() const - - Unordered Associative Container (tr1) - - The number of elements in the sparse_hash_map divided by - the number of buckets. -
- float max_load_factor() const - - Unordered Associative Container (tr1) - - The maximum load factor before increasing the number of buckets in - the sparse_hash_map. -
- void max_load_factor(float new_grow) - - Unordered Associative Container (tr1) - - Sets the maximum load factor before increasing the number of - buckets in the sparse_hash_map. -
- float min_load_factor() const - - sparse_hash_map - - The minimum load factor before decreasing the number of buckets in - the sparse_hash_map. -
- void min_load_factor(float new_grow) - - sparse_hash_map - - Sets the minimum load factor before decreasing the number of - buckets in the sparse_hash_map. -
- void set_resizing_parameters(float shrink, float grow) - - sparse_hash_map - - DEPRECATED. See below. -
- void resize(size_type n) - - Hashed - Associative Container - - Increases the bucket count to hold at least n items. - [4] [5] -
- void rehash(size_type n) - - Unordered Associative Container (tr1) - - Increases the bucket count to hold at least n items. - This is identical to resize. - [4] [5] -
- hasher hash_funct() const - - Hashed - Associative Container - - Returns the hasher object used by the sparse_hash_map. -
- hasher hash_function() const - - Unordered Associative Container (tr1) - - Returns the hasher object used by the sparse_hash_map. - This is idential to hash_funct. -
- key_equal key_eq() const - - Hashed - Associative Container - - Returns the key_equal object used by the - sparse_hash_map. -
- sparse_hash_map() - - Container - - Creates an empty sparse_hash_map. -
- sparse_hash_map(size_type n) - - Hashed - Associative Container - - Creates an empty sparse_hash_map that's optimized for holding - up to n items. - [5] -
- sparse_hash_map(size_type n, const hasher& h) - - Hashed - Associative Container - - Creates an empty sparse_hash_map that's optimized for up - to n items, using h as the hash function. -
- sparse_hash_map(size_type n, const hasher& h, const - key_equal& k) - - Hashed - Associative Container - - Creates an empty sparse_hash_map that's optimized for up - to n items, using h as the hash function and - k as the key equal function. -
-
template <class InputIterator>
-sparse_hash_map(InputIterator f, InputIterator l) 
-[2] -
- Unique - Hashed Associative Container - - Creates a sparse_hash_map with a copy of a range. -
-
template <class InputIterator>
-sparse_hash_map(InputIterator f, InputIterator l, size_type n) 
-[2] -
- Unique - Hashed Associative Container - - Creates a hash_map with a copy of a range that's optimized to - hold up to n items. -
-
template <class InputIterator>
-sparse_hash_map(InputIterator f, InputIterator l, size_type n, const
-hasher& h) 
[2] -
- Unique - Hashed Associative Container - - Creates a hash_map with a copy of a range that's optimized to hold - up to n items, using h as the hash function. -
-
template <class InputIterator>
-sparse_hash_map(InputIterator f, InputIterator l, size_type n, const
-hasher& h, const key_equal& k) 
[2] -
- Unique - Hashed Associative Container - - Creates a hash_map with a copy of a range that's optimized for - holding up to n items, using h as the hash - function and k as the key equal function. -
- sparse_hash_map(const hash_map&) - - Container - - The copy constructor. -
- sparse_hash_map& operator=(const hash_map&) - - Container - - The assignment operator -
- void swap(hash_map&) - - Container - - Swaps the contents of two hash_maps. -
-
pair<iterator, bool> insert(const value_type& x)
-
-
- Unique - Associative Container - - Inserts x into the sparse_hash_map. -
-
template <class InputIterator>
-void insert(InputIterator f, InputIterator l) 
[2] -
- Unique - Associative Container - - Inserts a range into the sparse_hash_map. -
- void set_deleted_key(const key_type& key) [6] - - sparse_hash_map - - See below. -
- void clear_deleted_key() [6] - - sparse_hash_map - - See below. -
- void erase(iterator pos) - - Associative - Container - - Erases the element pointed to by pos. - [6] -
- size_type erase(const key_type& k) - - Associative - Container - - Erases the element whose key is k. - [6] -
- void erase(iterator first, iterator last) - - Associative - Container - - Erases all elements in a range. - [6] -
- void clear() - - Associative - Container - - Erases all of the elements. -
- const_iterator find(const key_type& k) const - - Associative - Container - - Finds an element whose key is k. -
- iterator find(const key_type& k) - - Associative - Container - - Finds an element whose key is k. -
- size_type count(const key_type& k) const - - Unique - Associative Container - - Counts the number of elements whose key is k. -
-
pair<const_iterator, const_iterator> equal_range(const
-key_type& k) const 
-
- Associative - Container - - Finds a range containing all elements whose key is k. -
-
pair<iterator, iterator> equal_range(const
-key_type& k) 
-
- Associative - Container - - Finds a range containing all elements whose key is k. -
-
data_type& operator[](const key_type& k) [3] 
-
- sparse_hash_map - - See below. -
- bool write_metadata(FILE *fp) - - sparse_hash_map - - See below. -
- bool read_metadata(FILE *fp) - - sparse_hash_map - - See below. -
- bool write_nopointer_data(FILE *fp) - - sparse_hash_map - - See below. -
- bool read_nopointer_data(FILE *fp) - - sparse_hash_map - - See below. -
-
bool operator==(const hash_map&, const hash_map&)
-
-
- Hashed - Associative Container - - Tests two hash_maps for equality. This is a global function, not a - member function. -
- - -

New members

- -These members are not defined in the Unique -Hashed Associative Container, Pair -Associative Container, or tr1's -Unordered Associative Container requirements, -but are specific to sparse_hash_map. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
MemberDescription
- void set_deleted_key(const key_type& key) - - Sets the distinguished "deleted" key to key. This must be - called before any calls to erase(). [6] -
- void clear_deleted_key() - - Clears the distinguished "deleted" key. After this is called, - calls to erase() are not valid on this object. - [6] -
-
-data_type& 
-operator[](const key_type& k) [3]
-
-
- Returns a reference to the object that is associated with - a particular key. If the sparse_hash_map does not already - contain such an object, operator[] inserts the default - object data_type(). [3] -
- void set_resizing_parameters(float shrink, float grow) - - This function is DEPRECATED. It is equivalent to calling - min_load_factor(shrink); max_load_factor(grow). -
- bool write_metadata(FILE *fp) - - Write hashtable metadata to fp. See below. -
- bool read_metadata(FILE *fp) - - Read hashtable metadata from fp. See below. -
- bool write_nopointer_data(FILE *fp) - - Write hashtable contents to fp. This is valid only if the - hashtable key and value are "plain" data. See below. -
- bool read_nopointer_data(FILE *fp) - - Read hashtable contents to fp. This is valid only if the - hashtable key and value are "plain" data. See below. -
- - -

Notes

- -

[1] - -sparse_hash_map::iterator is not a mutable iterator, because -sparse_hash_map::value_type is not Assignable. -That is, if i is of type sparse_hash_map::iterator -and p is of type sparse_hash_map::value_type, then -*i = p is not a valid expression. However, -sparse_hash_map::iterator isn't a constant iterator either, -because it can be used to modify the object that it points to. Using -the same notation as above, (*i).second = p is a valid -expression.

- -

[2] - -This member function relies on member template functions, which -may not be supported by all compilers. If your compiler supports -member templates, you can call this function with any type of input -iterator. If your compiler does not yet support member templates, -though, then the arguments must either be of type const -value_type* or of type sparse_hash_map::const_iterator.

- -

[3] - -Since operator[] might insert a new element into the -sparse_hash_map, it can't possibly be a const member -function. Note that the definition of operator[] is -extremely simple: m[k] is equivalent to -(*((m.insert(value_type(k, data_type()))).first)).second. -Strictly speaking, this member function is unnecessary: it exists only -for convenience.

- -

[4] - -In order to preserve iterators, erasing hashtable elements does not -cause a hashtable to resize. This means that after a string of -erase() calls, the hashtable will use more space than is -required. At a cost of invalidating all current iterators, you can -call resize() to manually compact the hashtable. The -hashtable promotes too-small resize() arguments to the -smallest legal value, so to compact a hashtable, it's sufficient to -call resize(0). - -

[5] - -Unlike some other hashtable implementations, the optional n in -the calls to the constructor, resize, and rehash -indicates not the desired number of buckets that -should be allocated, but instead the expected number of items to be -inserted. The class then sizes the hash-map appropriately for the -number of items specified. It's not an error to actually insert more -or fewer items into the hashtable, but the implementation is most -efficient -- does the fewest hashtable resizes -- if the number of -inserted items is n or slightly less.

- -

[6] - -sparse_hash_map requires you call -set_deleted_key() before calling erase(). (This is -the largest difference between the sparse_hash_map API and -other hash-map APIs. See implementation.html -for why this is necessary.) -The argument to set_deleted_key() should be a key-value that -is never used for legitimate hash-map entries. It is an error to call -erase() without first calling set_deleted_key(), and -it is also an error to call insert() with an item whose key -is the "deleted key."

- -

There is no need to call set_deleted_key if you do not -wish to call erase() on the hash-map.

- -

It is acceptable to change the deleted-key at any time by calling -set_deleted_key() with a new argument. You can also call -clear_deleted_key(), at which point all keys become valid for -insertion but no hashtable entries can be deleted until -set_deleted_key() is called again.

- -

Note: If you use set_deleted_key, it is also -necessary that data_type has a zero-argument default -constructor. This is because sparse_hash_map uses the -special value pair(deleted_key, data_type()) to denote -deleted buckets, and thus needs to be able to create -data_type using a zero-argument constructor.

- -

If your data_type does not have a zero-argument default -constructor, there are several workarounds:

-
    -
  • Store a pointer to data_type in the map, instead of - data_type directly. This may yield faster code as - well, since hashtable-resizes will just have to move pointers - around, rather than copying the entire data_type. -
  • Add a zero-argument default constructor to data_type. -
  • Subclass data_type and add a zero-argument default - constructor to the subclass. -
- -

If you do not use set_deleted_key, then there is no -requirement that data_type havea zero-argument default -constructor. - - -

Input/Output

- -

It is possible to save and restore sparse_hash_map objects -to disk. Storage takes place in two steps. The first writes the -hashtable metadata. The second writes the actual data.

- -

To write a hashtable to disk, first call write_metadata() -on an open file pointer. This saves the hashtable information in a -byte-order-independent format.

- -

After the metadata has been written to disk, you must write the -actual data stored in the hash-map to disk. If both the key and data -are "simple" enough, you can do this by calling -write_nopointer_data(). "Simple" data is data that can be -safely copied to disk via fwrite(). Native C data types fall -into this category, as do structs of native C data types. Pointers -and STL objects do not.

- -

Note that write_nopointer_data() does not do any endian -conversion. Thus, it is only appropriate when you intend to read the -data on the same endian architecture as you write the data.

- -

If you cannot use write_nopointer_data() for any reason, -you can write the data yourself by iterating over the -sparse_hash_map with a const_iterator and writing -the key and data in any manner you wish.

- -

To read the hashtable information from disk, first you must create -a sparse_hash_map object. Then open a file pointer to point -to the saved hashtable, and call read_metadata(). If you -saved the data via write_nopointer_data(), you can follow the -read_metadata() call with a call to -read_nopointer_data(). This is all that is needed.

- -

If you saved the data through a custom write routine, you must call -a custom read routine to read in the data. To do this, iterate over -the sparse_hash_map with an iterator; this operation -is sensical because the metadata has already been set up. For each -iterator item, you can read the key and value from disk, and set it -appropriately. You will need to do a const_cast on the -iterator, since it->first is always const. You -will also need to use placement-new if the key or value is a C++ -object. The code might look like this:

-
-   for (sparse_hash_map<int*, ComplicatedClass>::iterator it = ht.begin();
-        it != ht.end(); ++it) {
-       // The key is stored in the sparse_hash_map as a pointer
-       const_cast<int*>(it->first) = new int;
-       fread(const_cast<int*>(it->first), sizeof(int), 1, fp);
-       // The value is a complicated C++ class that takes an int to construct
-       int ctor_arg;
-       fread(&ctor_arg, sizeof(int), 1, fp);
-       new (&it->second) ComplicatedClass(ctor_arg);  // "placement new"
-   }
-
- - -

Validity of Iterators

- -

erase() is guaranteed not to invalidate any iterators -- -except for any iterators pointing to the item being erased, of course. -insert() invalidates all iterators, as does -resize().

- -

This is implemented by making erase() not resize the -hashtable. If you desire maximum space efficiency, you can call -resize(0) after a string of erase() calls, to force -the hashtable to resize to the smallest possible size.

- -

In addition to invalidating iterators, insert() -and resize() invalidate all pointers into the hashtable. If -you want to store a pointer to an object held in a sparse_hash_map, -either do so after finishing hashtable inserts, or store the object on -the heap and a pointer to it in the sparse_hash_map.

- - -

See also

- -

The following are SGI STL, and some Google STL, concepts and -classes related to sparse_hash_map.

- -hash_map, -Associative Container, -Hashed Associative Container, -Pair Associative Container, -Unique Hashed Associative Container, -set, -map -multiset, -multimap, -hash_set, -hash_multiset, -hash_multimap, -sparsetable, -sparse_hash_set, -dense_hash_set, -dense_hash_map - - - diff --git a/src/sparsehash-1.6/doc/sparse_hash_set.html b/src/sparsehash-1.6/doc/sparse_hash_set.html deleted file mode 100644 index 70c7721..0000000 --- a/src/sparsehash-1.6/doc/sparse_hash_set.html +++ /dev/null @@ -1,1376 +0,0 @@ - - - - - -sparse_hash_set<Key, HashFcn, EqualKey, Alloc> - - - - -

[Note: this document is formatted similarly to the SGI STL -implementation documentation pages, and refers to concepts and classes -defined there. However, neither this document nor the code it -describes is associated with SGI, nor is it necessary to have SGI's -STL implementation installed in order to use this class.]

- - -

sparse_hash_set<Key, HashFcn, EqualKey, Alloc>

- -

sparse_hash_set is a Hashed -Associative Container that stores objects of type Key. -sparse_hash_set is a Simple -Associative Container, meaning that its value type, as well as its -key type, is key. It is also a -Unique -Associative Container, meaning that no two elements have keys that -compare equal using EqualKey.

- -

Looking up an element in a sparse_hash_set by its key is -efficient, so sparse_hash_set is useful for "dictionaries" -where the order of elements is irrelevant. If it is important for the -elements to be in a particular order, however, then map is more appropriate.

- -

sparse_hash_set is distinguished from other hash-set -implementations by its stingy use of memory and by the ability to save -and restore contents to disk. On the other hand, this hash-set -implementation, while still efficient, is slower than other hash-set -implementations, and it also has requirements -- for instance, for a -distinguished "deleted key" -- that may not be easy for all -applications to satisfy.

- -

This class is appropriate for applications that need to store -large "dictionaries" in memory, or for applications that need these -dictionaries to be persistent.

- - -

Example

- -(Note: this example uses SGI semantics for hash<> --- the kind used by gcc and most Unix compiler suites -- and not -Dinkumware semantics -- the kind used by Microsoft Visual Studio. If -you are using MSVC, this example will not compile as-is: you'll need -to change hash to hash_compare, and you -won't use eqstr at all. See the MSVC documentation for -hash_map and hash_compare, for more -details.) - -
-#include <iostream>
-#include <google/sparse_hash_set>
-
-using google::sparse_hash_set;      // namespace where class lives by default
-using std::cout;
-using std::endl;
-using ext::hash;  // or __gnu_cxx::hash, or maybe tr1::hash, depending on your OS
-
-struct eqstr
-{
-  bool operator()(const char* s1, const char* s2) const
-  {
-    return (s1 == s2) || (s1 && s2 && strcmp(s1, s2) == 0);
-  }
-};
-
-void lookup(const hash_set<const char*, hash<const char*>, eqstr>& Set,
-            const char* word)
-{
-  sparse_hash_set<const char*, hash<const char*>, eqstr>::const_iterator it
-    = Set.find(word);
-  cout << word << ": "
-       << (it != Set.end() ? "present" : "not present")
-       << endl;
-}
-
-int main()
-{
-  sparse_hash_set<const char*, hash<const char*>, eqstr> Set;
-  Set.insert("kiwi");
-  Set.insert("plum");
-  Set.insert("apple");
-  Set.insert("mango");
-  Set.insert("apricot");
-  Set.insert("banana");
-
-  lookup(Set, "mango");
-  lookup(Set, "apple");
-  lookup(Set, "durian");
-}
-
- - -

Definition

- -Defined in the header sparse_hash_set. -This class is not part of the C++ standard, though it is mostly -compatible with the tr1 class unordered_set. - - -

Template parameters

- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ParameterDescriptionDefault
- Key - - The hash_set's key and value type. This is also defined as - sparse_hash_set::key_type and - sparse_hash_set::value_type. - -   -
- HashFcn - - The hash function used by the - hash_set. This is also defined as sparse_hash_set::hasher. -
Note: Hashtable performance depends heavliy on the choice of - hash function. See the performance - page for more information. -
- hash<Key> -
- EqualKey - - The hash_set key equality function: a binary predicate that determines - whether two keys are equal. This is also defined as - sparse_hash_set::key_equal. - - equal_to<Key> -
- Alloc - - Ignored; this is included only for API-compatibility - with SGI's (and tr1's) STL implementation. - -
- - -

Model of

- -Unique Hashed Associative Container, -Simple Associative Container - - -

Type requirements

- -
    -
  • -Key is Assignable. -
  • -EqualKey is a Binary Predicate whose argument type is Key. -
  • -EqualKey is an equivalence relation. -
  • -Alloc is an Allocator. -
- - -

Public base classes

- -None. - - -

Members

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
MemberWhere definedDescription
- value_type - - Container - - The type of object, T, stored in the hash_set. -
- key_type - - Associative - Container - - The key type associated with value_type. -
- hasher - - Hashed - Associative Container - - The sparse_hash_set's hash - function. -
- key_equal - - Hashed - Associative Container - - Function - object that compares keys for equality. -
- allocator_type - - Unordered Associative Container (tr1) - - The type of the Allocator given as a template parameter. -
- pointer - - Container - - Pointer to T. -
- reference - - Container - - Reference to T -
- const_reference - - Container - - Const reference to T -
- size_type - - Container - - An unsigned integral type. -
- difference_type - - Container - - A signed integral type. -
- iterator - - Container - - Iterator used to iterate through a sparse_hash_set. -
- const_iterator - - Container - - Const iterator used to iterate through a sparse_hash_set. - (iterator and const_iterator are the same type.) -
- local_iterator - - Unordered Associative Container (tr1) - - Iterator used to iterate through a subset of - sparse_hash_set. -
- const_local_iterator - - Unordered Associative Container (tr1) - - Const iterator used to iterate through a subset of - sparse_hash_set. -
- iterator begin() const - - Container - - Returns an iterator pointing to the beginning of the - sparse_hash_set. -
- iterator end() const - - Container - - Returns an iterator pointing to the end of the - sparse_hash_set. -
- local_iterator begin(size_type i) - - Unordered Associative Container (tr1) - - Returns a local_iterator pointing to the beginning of bucket - i in the sparse_hash_set. -
- local_iterator end(size_type i) - - Unordered Associative Container (tr1) - - Returns a local_iterator pointing to the end of bucket - i in the sparse_hash_set. For - sparse_hash_set, each bucket contains either 0 or 1 item. -
- const_local_iterator begin(size_type i) const - - Unordered Associative Container (tr1) - - Returns a const_local_iterator pointing to the beginning of bucket - i in the sparse_hash_set. -
- const_local_iterator end(size_type i) const - - Unordered Associative Container (tr1) - - Returns a const_local_iterator pointing to the end of bucket - i in the sparse_hash_set. For - sparse_hash_set, each bucket contains either 0 or 1 item. -
- size_type size() const - - Container - - Returns the size of the sparse_hash_set. -
- size_type max_size() const - - Container - - Returns the largest possible size of the sparse_hash_set. -
- bool empty() const - - Container - - true if the sparse_hash_set's size is 0. -
- size_type bucket_count() const - - Hashed - Associative Container - - Returns the number of buckets used by the sparse_hash_set. -
- size_type max_bucket_count() const - - Hashed - Associative Container - - Returns the largest possible number of buckets used by the sparse_hash_set. -
- size_type bucket_size(size_type i) const - - Unordered Associative Container (tr1) - - Returns the number of elements in bucket i. For - sparse_hash_set, this will be either 0 or 1. -
- size_type bucket(const key_type& key) const - - Unordered Associative Container (tr1) - - If the key exists in the map, returns the index of the bucket - containing the given key, otherwise, return the bucket the key - would be inserted into. - This value may be passed to begin(size_type) and - end(size_type). -
- float load_factor() const - - Unordered Associative Container (tr1) - - The number of elements in the sparse_hash_set divided by - the number of buckets. -
- float max_load_factor() const - - Unordered Associative Container (tr1) - - The maximum load factor before increasing the number of buckets in - the sparse_hash_set. -
- void max_load_factor(float new_grow) - - Unordered Associative Container (tr1) - - Sets the maximum load factor before increasing the number of - buckets in the sparse_hash_set. -
- float min_load_factor() const - - sparse_hash_set - - The minimum load factor before decreasing the number of buckets in - the sparse_hash_set. -
- void min_load_factor(float new_grow) - - sparse_hash_set - - Sets the minimum load factor before decreasing the number of - buckets in the sparse_hash_set. -
- void set_resizing_parameters(float shrink, float grow) - - sparse_hash_set - - DEPRECATED. See below. -
- void resize(size_type n) - - Hashed - Associative Container - - Increases the bucket count to hold at least n items. - [2] [3] -
- void rehash(size_type n) - - Unordered Associative Container (tr1) - - Increases the bucket count to hold at least n items. - This is identical to resize. - [2] [3] -
- hasher hash_funct() const - - Hashed - Associative Container - - Returns the hasher object used by the sparse_hash_set. -
- hasher hash_function() const - - Unordered Associative Container (tr1) - - Returns the hasher object used by the sparse_hash_set. - This is idential to hash_funct. -
- key_equal key_eq() const - - Hashed - Associative Container - - Returns the key_equal object used by the - sparse_hash_set. -
- sparse_hash_set() - - Container - - Creates an empty sparse_hash_set. -
- sparse_hash_set(size_type n) - - Hashed - Associative Container - - Creates an empty sparse_hash_set that's optimized for holding - up to n items. - [3] -
- sparse_hash_set(size_type n, const hasher& h) - - Hashed - Associative Container - - Creates an empty sparse_hash_set that's optimized for up - to n items, using h as the hash function. -
- sparse_hash_set(size_type n, const hasher& h, const - key_equal& k) - - Hashed - Associative Container - - Creates an empty sparse_hash_set that's optimized for up - to n items, using h as the hash function and - k as the key equal function. -
-
template <class InputIterator>
-sparse_hash_set(InputIterator f, InputIterator l) 
-[2] -
- Unique - Hashed Associative Container - - Creates a sparse_hash_set with a copy of a range. -
-
template <class InputIterator>
-sparse_hash_set(InputIterator f, InputIterator l, size_type n) 
-[2] -
- Unique - Hashed Associative Container - - Creates a hash_set with a copy of a range that's optimized to - hold up to n items. -
-
template <class InputIterator>
-sparse_hash_set(InputIterator f, InputIterator l, size_type n, const
-hasher& h) 
[2] -
- Unique - Hashed Associative Container - - Creates a hash_set with a copy of a range that's optimized to hold - up to n items, using h as the hash function. -
-
template <class InputIterator>
-sparse_hash_set(InputIterator f, InputIterator l, size_type n, const
-hasher& h, const key_equal& k) 
[2] -
- Unique - Hashed Associative Container - - Creates a hash_set with a copy of a range that's optimized for - holding up to n items, using h as the hash - function and k as the key equal function. -
- sparse_hash_set(const hash_set&) - - Container - - The copy constructor. -
- sparse_hash_set& operator=(const hash_set&) - - Container - - The assignment operator -
- void swap(hash_set&) - - Container - - Swaps the contents of two hash_sets. -
-
pair<iterator, bool> insert(const value_type& x)
-
-
- Unique - Associative Container - - Inserts x into the sparse_hash_set. -
-
template <class InputIterator>
-void insert(InputIterator f, InputIterator l) 
[2] -
- Unique - Associative Container - - Inserts a range into the sparse_hash_set. -
- void set_deleted_key(const key_type& key) [4] - - sparse_hash_set - - See below. -
- void clear_deleted_key() [4] - - sparse_hash_set - - See below. -
- void erase(iterator pos) - - Associative - Container - - Erases the element pointed to by pos. - [4] -
- size_type erase(const key_type& k) - - Associative - Container - - Erases the element whose key is k. - [4] -
- void erase(iterator first, iterator last) - - Associative - Container - - Erases all elements in a range. - [4] -
- void clear() - - Associative - Container - - Erases all of the elements. -
- iterator find(const key_type& k) const - - Associative - Container - - Finds an element whose key is k. -
- size_type count(const key_type& k) const - - Unique - Associative Container - - Counts the number of elements whose key is k. -
-
pair<iterator, iterator> equal_range(const
-key_type& k) const
-
- Associative - Container - - Finds a range containing all elements whose key is k. -
- bool write_metadata(FILE *fp) - - sparse_hash_set - - See below. -
- bool read_metadata(FILE *fp) - - sparse_hash_set - - See below. -
- bool write_nopointer_data(FILE *fp) - - sparse_hash_set - - See below. -
- bool read_nopointer_data(FILE *fp) - - sparse_hash_set - - See below. -
-
bool operator==(const hash_set&, const hash_set&)
-
-
- Hashed - Associative Container - - Tests two hash_sets for equality. This is a global function, not a - member function. -
- - -

New members

- -These members are not defined in the Unique -Hashed Associative Container, Simple -Associative Container, or tr1's Unordered Associative -Container requirements, but are specific to -sparse_hash_set. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
MemberDescription
- void set_deleted_key(const key_type& key) - - Sets the distinguished "deleted" key to key. This must be - called before any calls to erase(). [4] -
- void clear_deleted_key() - - Clears the distinguished "deleted" key. After this is called, - calls to erase() are not valid on this object. - [4] -
- void set_resizing_parameters(float shrink, float grow) - - This function is DEPRECATED. It is equivalent to calling - min_load_factor(shrink); max_load_factor(grow). -
- bool write_metadata(FILE *fp) - - Write hashtable metadata to fp. See below. -
- bool read_metadata(FILE *fp) - - Read hashtable metadata from fp. See below. -
- bool write_nopointer_data(FILE *fp) - - Write hashtable contents to fp. This is valid only if the - hashtable key and value are "plain" data. See below. -
- bool read_nopointer_data(FILE *fp) - - Read hashtable contents to fp. This is valid only if the - hashtable key and value are "plain" data. See below. -
- - -

Notes

- -

[1] - -This member function relies on member template functions, which -may not be supported by all compilers. If your compiler supports -member templates, you can call this function with any type of input -iterator. If your compiler does not yet support member templates, -though, then the arguments must either be of type const -value_type* or of type sparse_hash_set::const_iterator.

- -

[2] - -In order to preserve iterators, erasing hashtable elements does not -cause a hashtable to resize. This means that after a string of -erase() calls, the hashtable will use more space than is -required. At a cost of invalidating all current iterators, you can -call resize() to manually compact the hashtable. The -hashtable promotes too-small resize() arguments to the -smallest legal value, so to compact a hashtable, it's sufficient to -call resize(0). - -

[3] - -Unlike some other hashtable implementations, the optional n in -the calls to the constructor, resize, and rehash -indicates not the desired number of buckets that -should be allocated, but instead the expected number of items to be -inserted. The class then sizes the hash-set appropriately for the -number of items specified. It's not an error to actually insert more -or fewer items into the hashtable, but the implementation is most -efficient -- does the fewest hashtable resizes -- if the number of -inserted items is n or slightly less.

- -

[4] - -sparse_hash_set requires you call -set_deleted_key() before calling erase(). (This is -the largest difference between the sparse_hash_set API and -other hash-set APIs. See implementation.html -for why this is necessary.) -The argument to set_deleted_key() should be a key-value that -is never used for legitimate hash-set entries. It is an error to call -erase() without first calling set_deleted_key(), and -it is also an error to call insert() with an item whose key -is the "deleted key."

- -

There is no need to call set_deleted_key if you do not -wish to call erase() on the hash-set.

- -

It is acceptable to change the deleted-key at any time by calling -set_deleted_key() with a new argument. You can also call -clear_deleted_key(), at which point all keys become valid for -insertion but no hashtable entries can be deleted until -set_deleted_key() is called again.

- - -

Input/Output

- -

It is possible to save and restore sparse_hash_set objects -to disk. Storage takes place in two steps. The first writes the -hashtable metadata. The second writes the actual data.

- -

To write a hashtable to disk, first call write_metadata() -on an open file pointer. This saves the hashtable information in a -byte-order-independent format.

- -

After the metadata has been written to disk, you must write the -actual data stored in the hash-set to disk. If both the key and data -are "simple" enough, you can do this by calling -write_nopointer_data(). "Simple" data is data that can be -safely copied to disk via fwrite(). Native C data types fall -into this category, as do structs of native C data types. Pointers -and STL objects do not.

- -

Note that write_nopointer_data() does not do any endian -conversion. Thus, it is only appropriate when you intend to read the -data on the same endian architecture as you write the data.

- -

If you cannot use write_nopointer_data() for any reason, -you can write the data yourself by iterating over the -sparse_hash_set with a const_iterator and writing -the key and data in any manner you wish.

- -

To read the hashtable information from disk, first you must create -a sparse_hash_set object. Then open a file pointer to point -to the saved hashtable, and call read_metadata(). If you -saved the data via write_nopointer_data(), you can follow the -read_metadata() call with a call to -read_nopointer_data(). This is all that is needed.

- -

If you saved the data through a custom write routine, you must call -a custom read routine to read in the data. To do this, iterate over -the sparse_hash_set with an iterator; this operation -is sensical because the metadata has already been set up. For each -iterator item, you can read the key and value from disk, and set it -appropriately. You will need to do a const_cast on the -iterator, since *it is always const. The -code might look like this:

-
-   for (sparse_hash_set<int*>::iterator it = ht.begin();
-        it != ht.end(); ++it) {
-       const_cast<int*>(*it) = new int;
-       fread(const_cast<int*>(*it), sizeof(int), 1, fp);
-   }
-
- -

Here's another example, where the item stored in the hash-set is -a C++ object with a non-trivial constructor. In this case, you must -use "placement new" to construct the object at the correct memory -location.

-
-   for (sparse_hash_set<ComplicatedClass>::iterator it = ht.begin();
-        it != ht.end(); ++it) {
-       int ctor_arg;  // ComplicatedClass takes an int as its constructor arg
-       fread(&ctor_arg, sizeof(int), 1, fp);
-       new (const_cast<ComplicatedClass*>(&(*it))) ComplicatedClass(ctor_arg);
-   }
-
- - -

Validity of Iterators

- -

erase() is guaranteed not to invalidate any iterators -- -except for any iterators pointing to the item being erased, of course. -insert() invalidates all iterators, as does -resize().

- -

This is implemented by making erase() not resize the -hashtable. If you desire maximum space efficiency, you can call -resize(0) after a string of erase() calls, to force -the hashtable to resize to the smallest possible size.

- -

In addition to invalidating iterators, insert() -and resize() invalidate all pointers into the hashtable. If -you want to store a pointer to an object held in a sparse_hash_set, -either do so after finishing hashtable inserts, or store the object on -the heap and a pointer to it in the sparse_hash_set.

- - -

See also

- -

The following are SGI STL, and some Google STL, concepts and -classes related to sparse_hash_set.

- -hash_set, -Associative Container, -Hashed Associative Container, -Simple Associative Container, -Unique Hashed Associative Container, -set, -map -multiset, -multimap, -hash_map, -hash_multiset, -hash_multimap, -sparsetable, -sparse_hash_map, -dense_hash_set, -dense_hash_map - - - diff --git a/src/sparsehash-1.6/doc/sparsetable.html b/src/sparsehash-1.6/doc/sparsetable.html deleted file mode 100644 index d8c8364..0000000 --- a/src/sparsehash-1.6/doc/sparsetable.html +++ /dev/null @@ -1,1393 +0,0 @@ - - - - - -sparsetable<T, GROUP_SIZE> - - - - -

[Note: this document is formatted similarly to the SGI STL -implementation documentation pages, and refers to concepts and classes -defined there. However, neither this document nor the code it -describes is associated with SGI, nor is it necessary to have SGI's -STL implementation installed in order to use this class.]

- -

sparsetable<T, GROUP_SIZE>

- -

A sparsetable is a Random -Access Container that supports constant time random access to -elements, and constant time insertion and removal of elements. It -implements the "array" or "table" abstract data type. The number of -elements in a sparsetable is set at constructor time, though -you can change it at any time by calling resize().

- -

sparsetable is distinguished from other array -implementations, including the default C implementation, in its stingy -use of memory -- in particular, unused array elements require only 1 bit -of disk space to store, rather than sizeof(T) bytes -- and by -the ability to save and restore contents to disk. On the other hand, -this array implementation, while still efficient, is slower than other -array implementations.

- - -

A sparsetable distinguishes between table elements that -have been assigned and those that are unassigned. -Assigned table elements are those that have had a value set via -set(), operator(), assignment via an iterator, and -so forth. Unassigned table elements are those that have not had a -value set in one of these ways, or that have been explicitly -unassigned via a call to erase() or clear(). Lookup -is valid on both assigned and unassigned table elements; for -unassigned elements, lookup returns the default value -T().

-
- -

This class is appropriate for applications that need to store large -arrays in memory, or for applications that need these arrays to be -persistent.

- - -

Example

- -
-#include <google/sparsetable>
-
-using google::sparsetable;      // namespace where class lives by default
-
-sparsetable<int> t(100);
-t[5] = 6;
-cout << "t[5] = " << t[5];
-cout << "Default value = " << t[99];
-
- - -

Definition

- -Defined in the header sparsetable. This -class is not part of the C++ standard. - - -

Template parameters

- - - - - - - - - - - - - - - - -
ParameterDescriptionDefault
- T - - The sparsetable's value type: the type of object that is stored in - the table. - -   -
- GROUP_SIZE - - The number of elements in each sparsetable group (see the implementation doc for more details - on this value). This almost never need be specified; the default - template parameter value works well in all situations. - -   -
- - -

Model of

- -Random Access Container - - -

Type requirements

- -None, except for those imposed by the requirements of -Random -Access Container - - -

Public base classes

- -None. - - -

Members

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
MemberWhere definedDescription
- value_type - - Container - - The type of object, T, stored in the table. -
- pointer - - Container - - Pointer to T. -
- reference - - Container - - Reference to T. -
- const_reference - - Container - - Const reference to T. -
- size_type - - Container - - An unsigned integral type. -
- difference_type - - Container - - A signed integral type. -
- iterator - - Container - - Iterator used to iterate through a sparsetable. -
- const_iterator - - Container - - Const iterator used to iterate through a sparsetable. -
- reverse_iterator - - Reversible - Container - - Iterator used to iterate backwards through a sparsetable. -
- const_reverse_iterator - - Reversible - Container - - Const iterator used to iterate backwards through a - sparsetable. -
- nonempty_iterator - - sparsetable - - Iterator used to iterate through the - assigned elements of the - sparsetable. -
- const_nonempty_iterator - - sparsetable - - Const iterator used to iterate through the - assigned elements of the - sparsetable. -
- reverse_nonempty_iterator - - sparsetable - - Iterator used to iterate backwards through the - assigned elements of the - sparsetable. -
- const_reverse_nonempty_iterator - - sparsetable - - Const iterator used to iterate backwards through the - assigned elements of the - sparsetable. -
- destructive_iterator - - sparsetable - - Iterator used to iterate through the - assigned elements of the - sparsetable, erasing elements as it iterates. - [1] -
- iterator begin() - - Container - - Returns an iterator pointing to the beginning of the - sparsetable. -
- iterator end() - - Container - - Returns an iterator pointing to the end of the - sparsetable. -
- const_iterator begin() const - - Container - - Returns an const_iterator pointing to the beginning of the - sparsetable. -
- const_iterator end() const - - Container - - Returns an const_iterator pointing to the end of the - sparsetable. -
- reverse_iterator rbegin() - - Reversible - Container - - Returns a reverse_iterator pointing to the beginning of the - reversed sparsetable. -
- reverse_iterator rend() - - Reversible - Container - - Returns a reverse_iterator pointing to the end of the - reversed sparsetable. -
- const_reverse_iterator rbegin() const - - Reversible - Container - - Returns a const_reverse_iterator pointing to the beginning - of the reversed sparsetable. -
- const_reverse_iterator rend() const - - Reversible - Container - - Returns a const_reverse_iterator pointing to the end of - the reversed sparsetable. -
- nonempty_iterator nonempty_begin() - - sparsetable - - Returns a nonempty_iterator pointing to the first - assigned element of the - sparsetable. -
- nonempty_iterator nonempty_end() - - sparsetable - - Returns a nonempty_iterator pointing to the end of the - sparsetable. -
- const_nonempty_iterator nonempty_begin() const - - sparsetable - - Returns a const_nonempty_iterator pointing to the first - assigned element of the - sparsetable. -
- const_nonempty_iterator nonempty_end() const - - sparsetable - - Returns a const_nonempty_iterator pointing to the end of - the sparsetable. -
- reverse_nonempty_iterator nonempty_rbegin() - - sparsetable - - Returns a reverse_nonempty_iterator pointing to the first - assigned element of the reversed - sparsetable. -
- reverse_nonempty_iterator nonempty_rend() - - sparsetable - - Returns a reverse_nonempty_iterator pointing to the end of - the reversed sparsetable. -
- const_reverse_nonempty_iterator nonempty_rbegin() const - - sparsetable - - Returns a const_reverse_nonempty_iterator pointing to the - first assigned element of the reversed - sparsetable. -
- const_reverse_nonempty_iterator nonempty_rend() const - - sparsetable - - Returns a const_reverse_nonempty_iterator pointing to the - end of the reversed sparsetable. -
- destructive_iterator destructive_begin() - - sparsetable - - Returns a destructive_iterator pointing to the first - assigned element of the - sparsetable. -
- destructive_iterator destructive_end() - - sparsetable - - Returns a destructive_iterator pointing to the end of - the sparsetable. -
- size_type size() const - - Container - - Returns the size of the sparsetable. -
- size_type max_size() const - - Container - - Returns the largest possible size of the sparsetable. -
- bool empty() const - - Container - - true if the sparsetable's size is 0. -
- size_type num_nonempty() const - - sparsetable - - Returns the number of sparsetable elements that are currently assigned. -
- sparsetable(size_type n) - - Container - - Creates a sparsetable with n elements. -
- sparsetable(const sparsetable&) - - Container - - The copy constructor. -
- ~sparsetable() - - Container - - The destructor. -
- sparsetable& operator=(const sparsetable&) - - Container - - The assignment operator -
- void swap(sparsetable&) - - Container - - Swaps the contents of two sparsetables. -
- reference operator[](size_type n) - - Random - Access Container - - Returns the n'th element. [2] -
- const_reference operator[](size_type n) const - - Random - Access Container - - Returns the n'th element. -
- bool test(size_type i) const - - sparsetable - - true if the i'th element of the sparsetable is assigned. -
- bool test(iterator pos) const - - sparsetable - - true if the sparsetable element pointed to by pos - is assigned. -
- bool test(const_iterator pos) const - - sparsetable - - true if the sparsetable element pointed to by pos - is assigned. -
- const_reference get(size_type i) const - - sparsetable - - returns the i'th element of the sparsetable. -
- reference set(size_type i, const_reference val) - - sparsetable - - Sets the i'th element of the sparsetable to value - val. -
- void erase(size_type i) - - sparsetable - - Erases the i'th element of the sparsetable. -
- void erase(iterator pos) - - sparsetable - - Erases the element of the sparsetable pointed to by - pos. -
- void erase(iterator first, iterator last) - - sparsetable - - Erases the elements of the sparsetable in the range - [first, last). -
- void clear() - - sparsetable - - Erases all of the elements. -
- void resize(size_type n) - - sparsetable - - Changes the size of sparsetable to n. -
- bool write_metadata(FILE *fp) - - sparsetable - - See below. -
- bool read_metadata(FILE *fp) - - sparsetable - - See below. -
- bool write_nopointer_data(FILE *fp) - - sparsetable - - See below. -
- bool read_nopointer_data(FILE *fp) - - sparsetable - - See below. -
-
bool operator==(const sparsetable&, const sparsetable&)
-
-
- Forward - Container - - Tests two sparsetables for equality. This is a global function, - not a member function. -
-
bool operator<(const sparsetable&, const sparsetable&)
-
-
- Forward - Container - - Lexicographical comparison. This is a global function, - not a member function. -
- - -

New members

- -These members are not defined in the Random -Access Container requirement, but are specific to -sparsetable. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
MemberDescription
- nonempty_iterator - - Iterator used to iterate through the - assigned elements of the - sparsetable. -
- const_nonempty_iterator - - Const iterator used to iterate through the - assigned elements of the - sparsetable. -
- reverse_nonempty_iterator - - Iterator used to iterate backwards through the - assigned elements of the - sparsetable. -
- const_reverse_nonempty_iterator - - Const iterator used to iterate backwards through the - assigned elements of the - sparsetable. -
- destructive_iterator - - Iterator used to iterate through the - assigned elements of the - sparsetable, erasing elements as it iterates. - [1] -
- nonempty_iterator nonempty_begin() - - Returns a nonempty_iterator pointing to the first - assigned element of the - sparsetable. -
- nonempty_iterator nonempty_end() - - Returns a nonempty_iterator pointing to the end of the - sparsetable. -
- const_nonempty_iterator nonempty_begin() const - - Returns a const_nonempty_iterator pointing to the first - assigned element of the - sparsetable. -
- const_nonempty_iterator nonempty_end() const - - Returns a const_nonempty_iterator pointing to the end of - the sparsetable. -
- reverse_nonempty_iterator nonempty_rbegin() - - Returns a reverse_nonempty_iterator pointing to the first - assigned element of the reversed - sparsetable. -
- reverse_nonempty_iterator nonempty_rend() - - Returns a reverse_nonempty_iterator pointing to the end of - the reversed sparsetable. -
- const_reverse_nonempty_iterator nonempty_rbegin() const - - Returns a const_reverse_nonempty_iterator pointing to the - first assigned element of the reversed - sparsetable. -
- const_reverse_nonempty_iterator nonempty_rend() const - - Returns a const_reverse_nonempty_iterator pointing to the - end of the reversed sparsetable. -
- destructive_iterator destructive_begin() - - Returns a destructive_iterator pointing to the first - assigned element of the - sparsetable. -
- destructive_iterator destructive_end() - - Returns a destructive_iterator pointing to the end of - the sparsetable. -
- size_type num_nonempty() const - - Returns the number of sparsetable elements that are currently assigned. -
- bool test(size_type i) const - - true if the i'th element of the sparsetable is assigned. -
- bool test(iterator pos) const - - true if the sparsetable element pointed to by pos - is assigned. -
- bool test(const_iterator pos) const - - true if the sparsetable element pointed to by pos - is assigned. -
- const_reference get(size_type i) const - - returns the i'th element of the sparsetable. If - the i'th element is assigned, the - assigned value is returned, otherwise, the default value - T() is returned. -
- reference set(size_type i, const_reference val) - - Sets the i'th element of the sparsetable to value - val, and returns a reference to the i'th element - of the table. This operation causes the i'th element to - be assigned. -
- void erase(size_type i) - - Erases the i'th element of the sparsetable. This - operation causes the i'th element to be unassigned. -
- void erase(iterator pos) - - Erases the element of the sparsetable pointed to by - pos. This operation causes the i'th element to - be unassigned. -
- void erase(iterator first, iterator last) - - Erases the elements of the sparsetable in the range - [first, last). This operation causes these elements to - be unassigned. -
- void clear() - - Erases all of the elements. This causes all elements to be - unassigned. -
- void resize(size_type n) - - Changes the size of sparsetable to n. If n is - greater than the old size, new, unassigned - elements are appended. If n is less than the old size, - all elements in position >n are deleted. -
- bool write_metadata(FILE *fp) - - Write hashtable metadata to fp. See below. -
- bool read_metadata(FILE *fp) - - Read hashtable metadata from fp. See below. -
- bool write_nopointer_data(FILE *fp) - - Write hashtable contents to fp. This is valid only if the - hashtable key and value are "plain" data. See below. -
- bool read_nopointer_data(FILE *fp) - - Read hashtable contents to fp. This is valid only if the - hashtable key and value are "plain" data. See below. -
- - -

Notes

- -

[1] - -sparsetable::destructive_iterator iterates through a -sparsetable like a normal iterator, but ++it may delete the -element being iterated past. Obviously, this iterator can only be -used once on a given table! One application of this iterator is to -copy data from a sparsetable to some other data structure without -using extra memory to store the data in both places during the -copy.

- -

[2] - -Since operator[] might insert a new element into the -sparsetable, it can't possibly be a const member -function. In theory, since it might insert a new element, it should -cause the element it refers to to become assigned. However, this is undesirable when -operator[] is used to examine elements, rather than assign -them. Thus, as an implementation trick, operator[] does not -really return a reference. Instead it returns an object that -behaves almost exactly like a reference. This object, -however, delays setting the appropriate sparsetable element to assigned to when it is actually assigned to.

- -

For a bit more detail: the object returned by operator[] -is an opaque type which defines operator=, operator -reference(), and operator&. The first operator controls -assigning to the value. The second controls examining the value. The -third controls pointing to the value.

- -

All three operators perform exactly as an object of type -reference would perform. The only problems that arise is -when this object is accessed in situations where C++ cannot do the -conversion by default. By far the most common situation is with -variadic functions such as printf. In such situations, you -may need to manually cast the object to the right type:

-
-   printf("%d", static_cast<typename table::reference>(table[i]));
-
- - -

Input/Output

- -

It is possible to save and restore sparsetable objects -to disk. Storage takes place in two steps. The first writes the -table metadata. The second writes the actual data.

- -

To write a sparsetable to disk, first call write_metadata() -on an open file pointer. This saves the sparsetable information in a -byte-order-independent format.

- -

After the metadata has been written to disk, you must write the -actual data stored in the sparsetable to disk. If the value is -"simple" enough, you can do this by calling -write_nopointer_data(). "Simple" data is data that can be -safely copied to disk via fwrite(). Native C data types fall -into this category, as do structs of native C data types. Pointers -and STL objects do not.

- -

Note that write_nopointer_data() does not do any endian -conversion. Thus, it is only appropriate when you intend to read the -data on the same endian architecture as you write the data.

- -

If you cannot use write_nopointer_data() for any reason, -you can write the data yourself by iterating over the -sparsetable with a const_nonempty_iterator and -writing the key and data in any manner you wish.

- -

To read the hashtable information from disk, first you must create -a sparsetable object. Then open a file pointer to point -to the saved sparsetable, and call read_metadata(). If you -saved the data via write_nopointer_data(), you can follow the -read_metadata() call with a call to -read_nopointer_data(). This is all that is needed.

- -

If you saved the data through a custom write routine, you must call -a custom read routine to read in the data. To do this, iterate over -the sparsetable with a nonempty_iterator; this -operation is sensical because the metadata has already been set up. -For each iterator item, you can read the key and value from disk, and -set it appropriately. The code might look like this:

-
-   for (sparsetable<int*>::nonempty_iterator it = t.nonempty_begin();
-        it != t.nonempty_end(); ++it) {
-       *it = new int;
-       fread(*it, sizeof(int), 1, fp);
-   }
-
- -

Here's another example, where the item stored in the sparsetable is -a C++ object with a non-trivial constructor. In this case, you must -use "placement new" to construct the object at the correct memory -location.

-
-   for (sparsetable<ComplicatedCppClass>::nonempty_iterator it = t.nonempty_begin();
-        it != t.nonempty_end(); ++it) {
-       int constructor_arg;   // ComplicatedCppClass takes an int to construct
-       fread(&constructor_arg, sizeof(int), 1, fp);
-       new (&(*it)) ComplicatedCppClass(constructor_arg);     // placement new
-   }
-
- - -

See also

- -

The following are SGI STL concepts and classes related to -sparsetable.

- -Container, -Random Access Container, -sparse_hash_set, -sparse_hash_map - - - diff --git a/src/sparsehash-1.6/experimental/Makefile b/src/sparsehash-1.6/experimental/Makefile deleted file mode 100644 index aa997f7..0000000 --- a/src/sparsehash-1.6/experimental/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -example: example.o libchash.o - $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ - -.SUFFIXES: .c .o .h -.c.o: - $(CC) -c $(CPPFLAGS) $(CFLAGS) -o $@ $< - -example.o: example.c libchash.h -libchash.o: libchash.c libchash.h diff --git a/src/sparsehash-1.6/experimental/README b/src/sparsehash-1.6/experimental/README deleted file mode 100644 index 150161d..0000000 --- a/src/sparsehash-1.6/experimental/README +++ /dev/null @@ -1,14 +0,0 @@ -This is a C version of sparsehash (and also, maybe, densehash) that I -wrote way back when, and served as the inspiration for the C++ -version. The API for the C version is much uglier than the C++, -because of the lack of template support. I believe the class works, -but I'm not convinced it's really flexible or easy enough to use. - -It would be nice to rework this C class to follow the C++ API as -closely as possible (eg have a set_deleted_key() instead of using a -#define like this code does now). I believe the code compiles and -runs, if anybody is interested in using it now, but it's subject to -major change in the future, as people work on it. - -Craig Silverstein -20 March 2005 diff --git a/src/sparsehash-1.6/experimental/example.c b/src/sparsehash-1.6/experimental/example.c deleted file mode 100644 index 38a3265..0000000 --- a/src/sparsehash-1.6/experimental/example.c +++ /dev/null @@ -1,54 +0,0 @@ -#include -#include -#include -#include "libchash.h" - -static void TestInsert() { - struct HashTable* ht; - HTItem* bck; - - ht = AllocateHashTable(1, 0); /* value is 1 byte, 0: don't copy keys */ - - HashInsert(ht, PTR_KEY(ht, "January"), 31); /* 0: don't overwrite old val */ - bck = HashInsert(ht, PTR_KEY(ht, "February"), 28); - bck = HashInsert(ht, PTR_KEY(ht, "March"), 31); - - bck = HashFind(ht, PTR_KEY(ht, "February")); - assert(bck); - assert(bck->data == 28); - - FreeHashTable(ht); -} - -static void TestFindOrInsert() { - struct HashTable* ht; - int i; - int iterations = 1000000; - int range = 30; /* random number between 1 and 30 */ - - ht = AllocateHashTable(4, 0); /* value is 4 bytes, 0: don't copy keys */ - - /* We'll test how good rand() is as a random number generator */ - for (i = 0; i < iterations; ++i) { - int key = rand() % range; - HTItem* bck = HashFindOrInsert(ht, key, 0); /* initialize to 0 */ - bck->data++; /* found one more of them */ - } - - for (i = 0; i < range; ++i) { - HTItem* bck = HashFind(ht, i); - if (bck) { - printf("%3d: %d\n", bck->key, bck->data); - } else { - printf("%3d: 0\n", i); - } - } - - FreeHashTable(ht); -} - -int main(int argc, char** argv) { - TestInsert(); - TestFindOrInsert(); - return 0; -} diff --git a/src/sparsehash-1.6/experimental/libchash.c b/src/sparsehash-1.6/experimental/libchash.c deleted file mode 100644 index eff9eeb..0000000 --- a/src/sparsehash-1.6/experimental/libchash.c +++ /dev/null @@ -1,1537 +0,0 @@ -/* Copyright (c) 1998 - 2005, Google Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Google Inc. nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * --- - * Author: Craig Silverstein - * - * This library is intended to be used for in-memory hash tables, - * though it provides rudimentary permanent-storage capabilities. - * It attempts to be fast, portable, and small. The best algorithm - * to fulfill these goals is an internal probing hashing algorithm, - * as in Knuth, _Art of Computer Programming_, vol III. Unlike - * chained (open) hashing, it doesn't require a pointer for every - * item, yet it is still constant time lookup in practice. - * - * Also to save space, we let the contents (both data and key) that - * you insert be a union: if the key/data is small, we store it - * directly in the hashtable, otherwise we store a pointer to it. - * To keep you from having to figure out which, use KEY_PTR and - * PTR_KEY to convert between the arguments to these functions and - * a pointer to the real data. For instance: - * char key[] = "ab", *key2; - * HTItem *bck; HashTable *ht; - * HashInsert(ht, PTR_KEY(ht, key), 0); - * bck = HashFind(ht, PTR_KEY(ht, "ab")); - * key2 = KEY_PTR(ht, bck->key); - * - * There are a rich set of operations supported: - * AllocateHashTable() -- Allocates a hashtable structure and - * returns it. - * cchKey: if it's a positive number, then each key is a - * fixed-length record of that length. If it's 0, - * the key is assumed to be a \0-terminated string. - * fSaveKey: normally, you are responsible for allocating - * space for the key. If this is 1, we make a - * copy of the key for you. - * ClearHashTable() -- Removes everything from a hashtable - * FreeHashTable() -- Frees memory used by a hashtable - * - * HashFind() -- takes a key (use PTR_KEY) and returns the - * HTItem containing that key, or NULL if the - * key is not in the hashtable. - * HashFindLast() -- returns the item found by last HashFind() - * HashFindOrInsert() -- inserts the key/data pair if the key - * is not already in the hashtable, or - * returns the appropraite HTItem if it is. - * HashFindOrInsertItem() -- takes key/data as an HTItem. - * HashInsert() -- adds a key/data pair to the hashtable. What - * it does if the key is already in the table - * depends on the value of SAMEKEY_OVERWRITE. - * HashInsertItem() -- takes key/data as an HTItem. - * HashDelete() -- removes a key/data pair from the hashtable, - * if it's there. RETURNS 1 if it was there, - * 0 else. - * If you use sparse tables and never delete, the full data - * space is available. Otherwise we steal -2 (maybe -3), - * so you can't have data fields with those values. - * HashDeleteLast() -- deletes the item returned by the last Find(). - * - * HashFirstBucket() -- used to iterate over the buckets in a - * hashtable. DON'T INSERT OR DELETE WHILE - * ITERATING! You can't nest iterations. - * HashNextBucket() -- RETURNS NULL at the end of iterating. - * - * HashSetDeltaGoalSize() -- if you're going to insert 1000 items - * at once, call this fn with arg 1000. - * It grows the table more intelligently. - * - * HashSave() -- saves the hashtable to a file. It saves keys ok, - * but it doesn't know how to interpret the data field, - * so if the data field is a pointer to some complex - * structure, you must send a function that takes a - * file pointer and a pointer to the structure, and - * write whatever you want to write. It should return - * the number of bytes written. If the file is NULL, - * it should just return the number of bytes it would - * write, without writing anything. - * If your data field is just an integer, not a - * pointer, just send NULL for the function. - * HashLoad() -- loads a hashtable. It needs a function that takes - * a file and the size of the structure, and expects - * you to read in the structure and return a pointer - * to it. You must do memory allocation, etc. If - * the data is just a number, send NULL. - * HashLoadKeys() -- unlike HashLoad(), doesn't load the data off disk - * until needed. This saves memory, but if you look - * up the same key a lot, it does a disk access each - * time. - * You can't do Insert() or Delete() on hashtables that were loaded - * from disk. - * - * See libchash.h for parameters you can modify. Make sure LOG_WORD_SIZE - * is defined correctly for your machine! (5 for 32 bit words, 6 for 64). - */ - -#include -#include -#include /* for strcmp, memcmp, etc */ -#include /* ULTRIX needs this for in.h */ -#include /* for reading/writing hashtables */ -#include -#include "libchash.h" /* all the types */ - - /* if keys are stored directly but cchKey is less than sizeof(ulong), */ - /* this cuts off the bits at the end */ -char grgKeyTruncMask[sizeof(ulong)][sizeof(ulong)]; -#define KEY_TRUNC(ht, key) \ - ( STORES_PTR(ht) || (ht)->cchKey == sizeof(ulong) \ - ? (key) : ((key) & *(ulong *)&(grgKeyTruncMask[(ht)->cchKey][0])) ) - - /* round num up to a multiple of wordsize. (LOG_WORD_SIZE-3 is in bytes) */ -#define WORD_ROUND(num) ( ((num-1) | ((1<<(LOG_WORD_SIZE-3))-1)) + 1 ) -#define NULL_TERMINATED 0 /* val of cchKey if keys are null-term strings */ - - /* Useful operations we do to keys: compare them, copy them, free them */ - -#define KEY_CMP(ht, key1, key2) ( !STORES_PTR(ht) ? (key1) - (key2) : \ - (key1) == (key2) ? 0 : \ - HashKeySize(ht) == NULL_TERMINATED ? \ - strcmp((char *)key1, (char *)key2) :\ - memcmp((void *)key1, (void *)key2, \ - HashKeySize(ht)) ) - -#define COPY_KEY(ht, keyTo, keyFrom) do \ - if ( !STORES_PTR(ht) || !(ht)->fSaveKeys ) \ - (keyTo) = (keyFrom); /* just copy pointer or info */\ - else if ( (ht)->cchKey == NULL_TERMINATED ) /* copy 0-term.ed str */\ - { \ - (keyTo) = (ulong)HTsmalloc( WORD_ROUND(strlen((char *)(keyFrom))+1) ); \ - strcpy((char *)(keyTo), (char *)(keyFrom)); \ - } \ - else \ - { \ - (keyTo) = (ulong) HTsmalloc( WORD_ROUND((ht)->cchKey) ); \ - memcpy( (char *)(keyTo), (char *)(keyFrom), (ht)->cchKey); \ - } \ - while ( 0 ) - -#define FREE_KEY(ht, key) do \ - if ( STORES_PTR(ht) && (ht)->fSaveKeys ) \ - if ( (ht)->cchKey == NULL_TERMINATED ) \ - HTfree((char *)(key), WORD_ROUND(strlen((char *)(key))+1)); \ - else \ - HTfree((char *)(key), WORD_ROUND((ht)->cchKey)); \ - while ( 0 ) - - /* the following are useful for bitmaps */ - /* Format is like this (if 1 word = 4 bits): 3210 7654 ba98 fedc ... */ -typedef ulong HTBitmapPart; /* this has to be unsigned, for >> */ -typedef HTBitmapPart HTBitmap[1<> LOG_WORD_SIZE) << (LOG_WORD_SIZE-3) ) -#define MOD2(i, logmod) ( (i) & ((1<<(logmod))-1) ) -#define DIV_NUM_ENTRIES(i) ( (i) >> LOG_WORD_SIZE ) -#define MOD_NUM_ENTRIES(i) ( MOD2(i, LOG_WORD_SIZE) ) -#define MODBIT(i) ( ((ulong)1) << MOD_NUM_ENTRIES(i) ) - -#define TEST_BITMAP(bm, i) ( (bm)[DIV_NUM_ENTRIES(i)] & MODBIT(i) ? 1 : 0 ) -#define SET_BITMAP(bm, i) (bm)[DIV_NUM_ENTRIES(i)] |= MODBIT(i) -#define CLEAR_BITMAP(bm, i) (bm)[DIV_NUM_ENTRIES(i)] &= ~MODBIT(i) - - /* the following are useful for reading and writing hashtables */ -#define READ_UL(fp, data) \ - do { \ - long _ul; \ - fread(&_ul, sizeof(_ul), 1, (fp)); \ - data = ntohl(_ul); \ - } while (0) - -#define WRITE_UL(fp, data) \ - do { \ - long _ul = htonl((long)(data)); \ - fwrite(&_ul, sizeof(_ul), 1, (fp)); \ - } while (0) - - /* Moves data from disk to memory if necessary. Note dataRead cannot be * - * NULL, because then we might as well (and do) load the data into memory */ -#define LOAD_AND_RETURN(ht, loadCommand) /* lC returns an HTItem * */ \ - if ( !(ht)->fpData ) /* data is stored in memory */ \ - return (loadCommand); \ - else /* must read data off of disk */ \ - { \ - int cchData; \ - HTItem *bck; \ - if ( (ht)->bckData.data ) free((char *)(ht)->bckData.data); \ - ht->bckData.data = (ulong)NULL; /* needed if loadCommand fails */ \ - bck = (loadCommand); \ - if ( bck == NULL ) /* loadCommand failed: key not found */ \ - return NULL; \ - else \ - (ht)->bckData = *bck; \ - fseek(ht->fpData, (ht)->bckData.data, SEEK_SET); \ - READ_UL((ht)->fpData, cchData); \ - (ht)->bckData.data = (ulong)(ht)->dataRead((ht)->fpData, cchData); \ - return &((ht)->bckData); \ - } - - -/* ======================================================================== */ -/* UTILITY ROUTINES */ -/* ---------------------- */ - -/* HTsmalloc() -- safe malloc - * allocates memory, or crashes if the allocation fails. - */ -static void *HTsmalloc(unsigned long size) -{ - void *retval; - - if ( size == 0 ) - return NULL; - retval = (void *)malloc(size); - if ( !retval ) - { - fprintf(stderr, "HTsmalloc: Unable to allocate %lu bytes of memory\n", - size); - exit(1); - } - return retval; -} - -/* HTscalloc() -- safe calloc - * allocates memory and initializes it to 0, or crashes if - * the allocation fails. - */ -static void *HTscalloc(unsigned long size) -{ - void *retval; - - retval = (void *)calloc(size, 1); - if ( !retval && size > 0 ) - { - fprintf(stderr, "HTscalloc: Unable to allocate %lu bytes of memory\n", - size); - exit(1); - } - return retval; -} - -/* HTsrealloc() -- safe calloc - * grows the amount of memory from a source, or crashes if - * the allocation fails. - */ -static void *HTsrealloc(void *ptr, unsigned long new_size, long delta) -{ - if ( ptr == NULL ) - return HTsmalloc(new_size); - ptr = realloc(ptr, new_size); - if ( !ptr && new_size > 0 ) - { - fprintf(stderr, "HTsrealloc: Unable to reallocate %lu bytes of memory\n", - new_size); - exit(1); - } - return ptr; -} - -/* HTfree() -- keep track of memory use - * frees memory using free, but updates count of how much memory - * is being used. - */ -static void HTfree(void *ptr, unsigned long size) -{ - if ( size > 0 ) /* some systems seem to not like freeing NULL */ - free(ptr); -} - -/*************************************************************************\ -| HTcopy() | -| Sometimes we interpret data as a ulong. But ulongs must be | -| aligned on some machines, so instead of casting we copy. | -\*************************************************************************/ - -unsigned long HTcopy(char *ul) -{ - unsigned long retval; - - memcpy(&retval, ul, sizeof(retval)); - return retval; -} - -/*************************************************************************\ -| HTSetupKeyTrunc() | -| If keys are stored directly but cchKey is less than | -| sizeof(ulong), this cuts off the bits at the end. | -\*************************************************************************/ - -static void HTSetupKeyTrunc(void) -{ - int i, j; - - for ( i = 0; i < sizeof(unsigned long); i++ ) - for ( j = 0; j < sizeof(unsigned long); j++ ) - grgKeyTruncMask[i][j] = j < i ? 255 : 0; /* chars have 8 bits */ -} - - -/* ======================================================================== */ -/* TABLE ROUTINES */ -/* -------------------- */ - -/* The idea is that a hashtable with (logically) t buckets is divided - * into t/M groups of M buckets each. (M is a constant set in - * LOG_BM_WORDS for efficiency.) Each group is stored sparsely. - * Thus, inserting into the table causes some array to grow, which is - * slow but still constant time. Lookup involves doing a - * logical-position-to-sparse-position lookup, which is also slow but - * constant time. The larger M is, the slower these operations are - * but the less overhead (slightly). - * - * To store the sparse array, we store a bitmap B, where B[i] = 1 iff - * bucket i is non-empty. Then to look up bucket i we really look up - * array[# of 1s before i in B]. This is constant time for fixed M. - * - * Terminology: the position of an item in the overall table (from - * 1 .. t) is called its "location." The logical position in a group - * (from 1 .. M ) is called its "position." The actual location in - * the array (from 1 .. # of non-empty buckets in the group) is - * called its "offset." - * - * The following operations are supported: - * o Allocate an array with t buckets, all empty - * o Free a array (but not whatever was stored in the buckets) - * o Tell whether or not a bucket is empty - * o Return a bucket with a given location - * o Set the value of a bucket at a given location - * o Iterate through all the buckets in the array - * o Read and write an occupancy bitmap to disk - * o Return how much memory is being allocated by the array structure - */ - -#ifndef SparseBucket /* by default, each bucket holds an HTItem */ -#define SparseBucket HTItem -#endif - -typedef struct SparseBin { - SparseBucket *binSparse; - HTBitmap bmOccupied; /* bmOccupied[i] is 1 if bucket i has an item */ - short cOccupied; /* size of binSparse; useful for iterators, eg */ -} SparseBin; - -typedef struct SparseIterator { - long posGroup; - long posOffset; - SparseBin *binSparse; /* state info, to avoid args for NextBucket() */ - ulong cBuckets; -} SparseIterator; - -#define LOG_LOW_BIN_SIZE ( LOG_BM_WORDS+LOG_WORD_SIZE ) -#define SPARSE_GROUPS(cBuckets) ( (((cBuckets)-1) >> LOG_LOW_BIN_SIZE) + 1 ) - - /* we need a small function to figure out # of items set in the bm */ -static HTOffset EntriesUpto(HTBitmapPart *bm, int i) -{ /* returns # of set bits in 0..i-1 */ - HTOffset retval = 0; - static HTOffset rgcBits[256] = /* # of bits set in one char */ - {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; - - if ( i == 0 ) return 0; - for ( ; i > sizeof(*bm)*8; i -= sizeof(*bm)*8, bm++ ) - { /* think of it as loop unrolling */ -#if LOG_WORD_SIZE >= 3 /* 1 byte per word, or more */ - retval += rgcBits[*bm & 255]; /* get the low byte */ -#if LOG_WORD_SIZE >= 4 /* at least 2 bytes */ - retval += rgcBits[(*bm >> 8) & 255]; -#if LOG_WORD_SIZE >= 5 /* at least 4 bytes */ - retval += rgcBits[(*bm >> 16) & 255]; - retval += rgcBits[(*bm >> 24) & 255]; -#if LOG_WORD_SIZE >= 6 /* 8 bytes! */ - retval += rgcBits[(*bm >> 32) & 255]; - retval += rgcBits[(*bm >> 40) & 255]; - retval += rgcBits[(*bm >> 48) & 255]; - retval += rgcBits[(*bm >> 56) & 255]; -#if LOG_WORD_SIZE >= 7 /* not a concern for a while... */ -#error Need to rewrite EntriesUpto to support such big words -#endif /* >8 bytes */ -#endif /* 8 bytes */ -#endif /* 4 bytes */ -#endif /* 2 bytes */ -#endif /* 1 byte */ - } - switch ( i ) { /* from 0 to 63 */ - case 0: - return retval; -#if LOG_WORD_SIZE >= 3 /* 1 byte per word, or more */ - case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 8: - return (retval + rgcBits[*bm & ((1 << i)-1)]); -#if LOG_WORD_SIZE >= 4 /* at least 2 bytes */ - case 9: case 10: case 11: case 12: case 13: case 14: case 15: case 16: - return (retval + rgcBits[*bm & 255] + - rgcBits[(*bm >> 8) & ((1 << (i-8))-1)]); -#if LOG_WORD_SIZE >= 5 /* at least 4 bytes */ - case 17: case 18: case 19: case 20: case 21: case 22: case 23: case 24: - return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] + - rgcBits[(*bm >> 16) & ((1 << (i-16))-1)]); - case 25: case 26: case 27: case 28: case 29: case 30: case 31: case 32: - return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] + - rgcBits[(*bm >> 16) & 255] + - rgcBits[(*bm >> 24) & ((1 << (i-24))-1)]); -#if LOG_WORD_SIZE >= 6 /* 8 bytes! */ - case 33: case 34: case 35: case 36: case 37: case 38: case 39: case 40: - return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] + - rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] + - rgcBits[(*bm >> 32) & ((1 << (i-32))-1)]); - case 41: case 42: case 43: case 44: case 45: case 46: case 47: case 48: - return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] + - rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] + - rgcBits[(*bm >> 32) & 255] + - rgcBits[(*bm >> 40) & ((1 << (i-40))-1)]); - case 49: case 50: case 51: case 52: case 53: case 54: case 55: case 56: - return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] + - rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] + - rgcBits[(*bm >> 32) & 255] + rgcBits[(*bm >> 40) & 255] + - rgcBits[(*bm >> 48) & ((1 << (i-48))-1)]); - case 57: case 58: case 59: case 60: case 61: case 62: case 63: case 64: - return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] + - rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] + - rgcBits[(*bm >> 32) & 255] + rgcBits[(*bm >> 40) & 255] + - rgcBits[(*bm >> 48) & 255] + - rgcBits[(*bm >> 56) & ((1 << (i-56))-1)]); -#endif /* 8 bytes */ -#endif /* 4 bytes */ -#endif /* 2 bytes */ -#endif /* 1 byte */ - } - assert("" == "word size is too big in EntriesUpto()"); - return -1; -} -#define SPARSE_POS_TO_OFFSET(bm, i) ( EntriesUpto(&((bm)[0]), i) ) -#define SPARSE_BUCKET(bin, location) \ - ( (bin)[(location) >> LOG_LOW_BIN_SIZE].binSparse + \ - SPARSE_POS_TO_OFFSET((bin)[(location)>>LOG_LOW_BIN_SIZE].bmOccupied, \ - MOD2(location, LOG_LOW_BIN_SIZE)) ) - - -/*************************************************************************\ -| SparseAllocate() | -| SparseFree() | -| Allocates, sets-to-empty, and frees a sparse array. All you need | -| to tell me is how many buckets you want. I return the number of | -| buckets I actually allocated, setting the array as a parameter. | -| Note that you have to set auxilliary parameters, like cOccupied. | -\*************************************************************************/ - -static ulong SparseAllocate(SparseBin **pbinSparse, ulong cBuckets) -{ - int cGroups = SPARSE_GROUPS(cBuckets); - - *pbinSparse = (SparseBin *) HTscalloc(sizeof(**pbinSparse) * cGroups); - return cGroups << LOG_LOW_BIN_SIZE; -} - -static SparseBin *SparseFree(SparseBin *binSparse, ulong cBuckets) -{ - ulong iGroup, cGroups = SPARSE_GROUPS(cBuckets); - - for ( iGroup = 0; iGroup < cGroups; iGroup++ ) - HTfree(binSparse[iGroup].binSparse, (sizeof(*binSparse[iGroup].binSparse) - * binSparse[iGroup].cOccupied)); - HTfree(binSparse, sizeof(*binSparse) * cGroups); - return NULL; -} - -/*************************************************************************\ -| SparseIsEmpty() | -| SparseFind() | -| You give me a location (ie a number between 1 and t), and I | -| return the bucket at that location, or NULL if the bucket is | -| empty. It's OK to call Find() on an empty table. | -\*************************************************************************/ - -static int SparseIsEmpty(SparseBin *binSparse, ulong location) -{ - return !TEST_BITMAP(binSparse[location>>LOG_LOW_BIN_SIZE].bmOccupied, - MOD2(location, LOG_LOW_BIN_SIZE)); -} - -static SparseBucket *SparseFind(SparseBin *binSparse, ulong location) -{ - if ( SparseIsEmpty(binSparse, location) ) - return NULL; - return SPARSE_BUCKET(binSparse, location); -} - -/*************************************************************************\ -| SparseInsert() | -| You give me a location, and contents to put there, and I insert | -| into that location and RETURN a pointer to the location. If | -| bucket was already occupied, I write over the contents only if | -| *pfOverwrite is 1. We set *pfOverwrite to 1 if there was someone | -| there (whether or not we overwrote) and 0 else. | -\*************************************************************************/ - -static SparseBucket *SparseInsert(SparseBin *binSparse, SparseBucket *bckInsert, - ulong location, int *pfOverwrite) -{ - SparseBucket *bckPlace; - HTOffset offset; - - bckPlace = SparseFind(binSparse, location); - if ( bckPlace ) /* means we replace old contents */ - { - if ( *pfOverwrite ) - *bckPlace = *bckInsert; - *pfOverwrite = 1; - return bckPlace; - } - - binSparse += (location >> LOG_LOW_BIN_SIZE); - offset = SPARSE_POS_TO_OFFSET(binSparse->bmOccupied, - MOD2(location, LOG_LOW_BIN_SIZE)); - binSparse->binSparse = (SparseBucket *) - HTsrealloc(binSparse->binSparse, - sizeof(*binSparse->binSparse) * ++binSparse->cOccupied, - sizeof(*binSparse->binSparse)); - memmove(binSparse->binSparse + offset+1, - binSparse->binSparse + offset, - (binSparse->cOccupied-1 - offset) * sizeof(*binSparse->binSparse)); - binSparse->binSparse[offset] = *bckInsert; - SET_BITMAP(binSparse->bmOccupied, MOD2(location, LOG_LOW_BIN_SIZE)); - *pfOverwrite = 0; - return binSparse->binSparse + offset; -} - -/*************************************************************************\ -| SparseFirstBucket() | -| SparseNextBucket() | -| SparseCurrentBit() | -| Iterate through the occupied buckets of a dense hashtable. You | -| must, of course, have allocated space yourself for the iterator. | -\*************************************************************************/ - -static SparseBucket *SparseNextBucket(SparseIterator *iter) -{ - if ( iter->posOffset != -1 && /* not called from FirstBucket()? */ - (++iter->posOffset < iter->binSparse[iter->posGroup].cOccupied) ) - return iter->binSparse[iter->posGroup].binSparse + iter->posOffset; - - iter->posOffset = 0; /* start the next group */ - for ( iter->posGroup++; iter->posGroup < SPARSE_GROUPS(iter->cBuckets); - iter->posGroup++ ) - if ( iter->binSparse[iter->posGroup].cOccupied > 0 ) - return iter->binSparse[iter->posGroup].binSparse; /* + 0 */ - return NULL; /* all remaining groups were empty */ -} - -static SparseBucket *SparseFirstBucket(SparseIterator *iter, - SparseBin *binSparse, ulong cBuckets) -{ - iter->binSparse = binSparse; /* set it up for NextBucket() */ - iter->cBuckets = cBuckets; - iter->posOffset = -1; /* when we advance, we're at 0 */ - iter->posGroup = -1; - return SparseNextBucket(iter); -} - -/*************************************************************************\ -| SparseWrite() | -| SparseRead() | -| These are routines for storing a sparse hashtable onto disk. We | -| store the number of buckets and a bitmap indicating which buckets | -| are allocated (occupied). The actual contents of the buckets | -| must be stored separately. | -\*************************************************************************/ - -static void SparseWrite(FILE *fp, SparseBin *binSparse, ulong cBuckets) -{ - ulong i, j; - - WRITE_UL(fp, cBuckets); - for ( i = 0; i < SPARSE_GROUPS(cBuckets); i++ ) - for ( j = 0; j < (1<rgBuckets, cBuckets); -} - -static ulong DenseAllocate(DenseBin **pbin, ulong cBuckets) -{ - *pbin = (DenseBin *) HTsmalloc(sizeof(*pbin)); - (*pbin)->rgBuckets = (DenseBucket *) HTsmalloc(sizeof(*(*pbin)->rgBuckets) - * cBuckets); - DenseClear(*pbin, cBuckets); - return cBuckets; -} - -static DenseBin *DenseFree(DenseBin *bin, ulong cBuckets) -{ - HTfree(bin->rgBuckets, sizeof(*bin->rgBuckets) * cBuckets); - HTfree(bin, sizeof(*bin)); - return NULL; -} - -static int DenseIsEmpty(DenseBin *bin, ulong location) -{ - return DENSE_IS_EMPTY(bin->rgBuckets, location); -} - -static DenseBucket *DenseFind(DenseBin *bin, ulong location) -{ - if ( DenseIsEmpty(bin, location) ) - return NULL; - return bin->rgBuckets + location; -} - -static DenseBucket *DenseInsert(DenseBin *bin, DenseBucket *bckInsert, - ulong location, int *pfOverwrite) -{ - DenseBucket *bckPlace; - - bckPlace = DenseFind(bin, location); - if ( bckPlace ) /* means something is already there */ - { - if ( *pfOverwrite ) - *bckPlace = *bckInsert; - *pfOverwrite = 1; /* set to 1 to indicate someone was there */ - return bckPlace; - } - else - { - bin->rgBuckets[location] = *bckInsert; - *pfOverwrite = 0; - return bin->rgBuckets + location; - } -} - -static DenseBucket *DenseNextBucket(DenseIterator *iter) -{ - for ( iter->pos++; iter->pos < iter->cBuckets; iter->pos++ ) - if ( !DenseIsEmpty(iter->bin, iter->pos) ) - return iter->bin->rgBuckets + iter->pos; - return NULL; /* all remaining groups were empty */ -} - -static DenseBucket *DenseFirstBucket(DenseIterator *iter, - DenseBin *bin, ulong cBuckets) -{ - iter->bin = bin; /* set it up for NextBucket() */ - iter->cBuckets = cBuckets; - iter->pos = -1; /* thus the next bucket will be 0 */ - return DenseNextBucket(iter); -} - -static void DenseWrite(FILE *fp, DenseBin *bin, ulong cBuckets) -{ - ulong pos = 0, bit, bm; - - WRITE_UL(fp, cBuckets); - while ( pos < cBuckets ) - { - bm = 0; - for ( bit = 0; bit < 8*sizeof(ulong); bit++ ) - { - if ( !DenseIsEmpty(bin, pos) ) - SET_BITMAP(&bm, bit); /* in fks-hash.h */ - if ( ++pos == cBuckets ) - break; - } - WRITE_UL(fp, bm); - } -} - -static ulong DenseRead(FILE *fp, DenseBin **pbin) -{ - ulong pos = 0, bit, bm, cBuckets; - - READ_UL(fp, cBuckets); - cBuckets = DenseAllocate(pbin, cBuckets); - while ( pos < cBuckets ) - { - READ_UL(fp, bm); - for ( bit = 0; bit < 8*sizeof(ulong); bit++ ) - { - if ( TEST_BITMAP(&bm, bit) ) /* in fks-hash.h */ - DENSE_SET_OCCUPIED((*pbin)->rgBuckets, pos); - else - DENSE_SET_EMPTY((*pbin)->rgBuckets, pos); - if ( ++pos == cBuckets ) - break; - } - } - return cBuckets; -} - -static ulong DenseMemory(ulong cBuckets, ulong cOccupied) -{ - return cBuckets * sizeof(DenseBucket); -} - - -/* ======================================================================== */ -/* HASHING ROUTINES */ -/* ---------------------- */ - -/* Implements a simple quadratic hashing scheme. We have a single hash - * table of size t and a single hash function h(x). When inserting an - * item, first we try h(x) % t. If it's occupied, we try h(x) + - * i*(i-1)/2 % t for increasing values of i until we hit a not-occupied - * space. To make this dynamic, we double the size of the hash table as - * soon as more than half the cells are occupied. When deleting, we can - * choose to shrink the hashtable when less than a quarter of the - * cells are occupied, or we can choose never to shrink the hashtable. - * For lookup, we check h(x) + i*(i-1)/2 % t (starting with i=0) until - * we get a match or we hit an empty space. Note that as a result, - * we can't make a cell empty on deletion, or lookups may end prematurely. - * Instead we mark the cell as "deleted." We thus steal the value - * DELETED as a possible "data" value. As long as data are pointers, - * that's ok. - * The hash increment we use, i(i-1)/2, is not the standard quadratic - * hash increment, which is i^2. i(i-1)/2 covers the entire bucket space - * when the hashtable size is a power of two, as it is for us. In fact, - * the first n probes cover n distinct buckets; then it repeats. This - * guarantees insertion will always succeed. - * If you linear hashing, set JUMP in chash.h. You can also change - * various other parameters there. - */ - -/*************************************************************************\ -| Hash() | -| The hash function I use is due to Bob Jenkins (see | -| http://burtleburtle.net/bob/hash/evahash.html | -| According to http://burtleburtle.net/bob/c/lookup2.c, | -| his implementation is public domain.) | -| It takes 36 instructions, in 18 cycles if you're lucky. | -| hashing depends on the fact the hashtable size is always a | -| power of 2. cBuckets is probably ht->cBuckets. | -\*************************************************************************/ - -#if LOG_WORD_SIZE == 5 /* 32 bit words */ - -#define mix(a,b,c) \ -{ \ - a -= b; a -= c; a ^= (c>>13); \ - b -= c; b -= a; b ^= (a<<8); \ - c -= a; c -= b; c ^= (b>>13); \ - a -= b; a -= c; a ^= (c>>12); \ - b -= c; b -= a; b ^= (a<<16); \ - c -= a; c -= b; c ^= (b>>5); \ - a -= b; a -= c; a ^= (c>>3); \ - b -= c; b -= a; b ^= (a<<10); \ - c -= a; c -= b; c ^= (b>>15); \ -} -#ifdef WORD_HASH /* play with this on little-endian machines */ -#define WORD_AT(ptr) ( *(ulong *)(ptr) ) -#else -#define WORD_AT(ptr) ( (ptr)[0] + ((ulong)(ptr)[1]<<8) + \ - ((ulong)(ptr)[2]<<16) + ((ulong)(ptr)[3]<<24) ) -#endif - -#elif LOG_WORD_SIZE == 6 /* 64 bit words */ - -#define mix(a,b,c) \ -{ \ - a -= b; a -= c; a ^= (c>>43); \ - b -= c; b -= a; b ^= (a<<9); \ - c -= a; c -= b; c ^= (b>>8); \ - a -= b; a -= c; a ^= (c>>38); \ - b -= c; b -= a; b ^= (a<<23); \ - c -= a; c -= b; c ^= (b>>5); \ - a -= b; a -= c; a ^= (c>>35); \ - b -= c; b -= a; b ^= (a<<49); \ - c -= a; c -= b; c ^= (b>>11); \ - a -= b; a -= c; a ^= (c>>12); \ - b -= c; b -= a; b ^= (a<<18); \ - c -= a; c -= b; c ^= (b>>22); \ -} -#ifdef WORD_HASH /* alpha is little-endian, btw */ -#define WORD_AT(ptr) ( *(ulong *)(ptr) ) -#else -#define WORD_AT(ptr) ( (ptr)[0] + ((ulong)(ptr)[1]<<8) + \ - ((ulong)(ptr)[2]<<16) + ((ulong)(ptr)[3]<<24) + \ - ((ulong)(ptr)[4]<<32) + ((ulong)(ptr)[5]<<40) + \ - ((ulong)(ptr)[6]<<48) + ((ulong)(ptr)[7]<<56) ) -#endif - -#else /* neither 32 or 64 bit words */ -#error This hash function can only hash 32 or 64 bit words. Sorry. -#endif - -static ulong Hash(HashTable *ht, char *key, ulong cBuckets) -{ - ulong a, b, c, cchKey, cchKeyOrig; - - cchKeyOrig = ht->cchKey == NULL_TERMINATED ? strlen(key) : ht->cchKey; - a = b = c = 0x9e3779b9; /* the golden ratio; an arbitrary value */ - - for ( cchKey = cchKeyOrig; cchKey >= 3 * sizeof(ulong); - cchKey -= 3 * sizeof(ulong), key += 3 * sizeof(ulong) ) - { - a += WORD_AT(key); - b += WORD_AT(key + sizeof(ulong)); - c += WORD_AT(key + sizeof(ulong)*2); - mix(a,b,c); - } - - c += cchKeyOrig; - switch ( cchKey ) { /* deal with rest. Cases fall through */ -#if LOG_WORD_SIZE == 5 - case 11: c += (ulong)key[10]<<24; - case 10: c += (ulong)key[9]<<16; - case 9 : c += (ulong)key[8]<<8; - /* the first byte of c is reserved for the length */ - case 8 : b += WORD_AT(key+4); a+= WORD_AT(key); break; - case 7 : b += (ulong)key[6]<<16; - case 6 : b += (ulong)key[5]<<8; - case 5 : b += key[4]; - case 4 : a += WORD_AT(key); break; - case 3 : a += (ulong)key[2]<<16; - case 2 : a += (ulong)key[1]<<8; - case 1 : a += key[0]; - /* case 0 : nothing left to add */ -#elif LOG_WORD_SIZE == 6 - case 23: c += (ulong)key[22]<<56; - case 22: c += (ulong)key[21]<<48; - case 21: c += (ulong)key[20]<<40; - case 20: c += (ulong)key[19]<<32; - case 19: c += (ulong)key[18]<<24; - case 18: c += (ulong)key[17]<<16; - case 17: c += (ulong)key[16]<<8; - /* the first byte of c is reserved for the length */ - case 16: b += WORD_AT(key+8); a+= WORD_AT(key); break; - case 15: b += (ulong)key[14]<<48; - case 14: b += (ulong)key[13]<<40; - case 13: b += (ulong)key[12]<<32; - case 12: b += (ulong)key[11]<<24; - case 11: b += (ulong)key[10]<<16; - case 10: b += (ulong)key[ 9]<<8; - case 9: b += (ulong)key[ 8]; - case 8: a += WORD_AT(key); break; - case 7: a += (ulong)key[ 6]<<48; - case 6: a += (ulong)key[ 5]<<40; - case 5: a += (ulong)key[ 4]<<32; - case 4: a += (ulong)key[ 3]<<24; - case 3: a += (ulong)key[ 2]<<16; - case 2: a += (ulong)key[ 1]<<8; - case 1: a += (ulong)key[ 0]; - /* case 0: nothing left to add */ -#endif - } - mix(a,b,c); - return c & (cBuckets-1); -} - - -/*************************************************************************\ -| Rehash() | -| You give me a hashtable, a new size, and a bucket to follow, and | -| I resize the hashtable's bin to be the new size, rehashing | -| everything in it. I keep particular track of the bucket you pass | -| in, and RETURN a pointer to where the item in the bucket got to. | -| (If you pass in NULL, I return an arbitrary pointer.) | -\*************************************************************************/ - -static HTItem *Rehash(HashTable *ht, ulong cNewBuckets, HTItem *bckWatch) -{ - Table *tableNew; - ulong iBucketFirst; - HTItem *bck, *bckNew = NULL; - ulong offset; /* the i in h(x) + i*(i-1)/2 */ - int fOverwrite = 0; /* not an issue: there can be no collisions */ - - assert( ht->table ); - cNewBuckets = Table(Allocate)(&tableNew, cNewBuckets); - /* Since we RETURN the new position of bckWatch, we want * - * to make sure it doesn't get moved due to some table * - * rehashing that comes after it's inserted. Thus, we * - * have to put it in last. This makes the loop weird. */ - for ( bck = HashFirstBucket(ht); ; bck = HashNextBucket(ht) ) - { - if ( bck == NULL ) /* we're done iterating, so look at bckWatch */ - { - bck = bckWatch; - if ( bck == NULL ) /* I guess bckWatch wasn't specified */ - break; - } - else if ( bck == bckWatch ) - continue; /* ignore if we see it during the iteration */ - - offset = 0; /* a new i for a new bucket */ - for ( iBucketFirst = Hash(ht, KEY_PTR(ht, bck->key), cNewBuckets); - !Table(IsEmpty)(tableNew, iBucketFirst); - iBucketFirst = (iBucketFirst + JUMP(KEY_PTR(ht,bck->key), offset)) - & (cNewBuckets-1) ) - ; - bckNew = Table(Insert)(tableNew, bck, iBucketFirst, &fOverwrite); - if ( bck == bckWatch ) /* we're done with the last thing to do */ - break; - } - Table(Free)(ht->table, ht->cBuckets); - ht->table = tableNew; - ht->cBuckets = cNewBuckets; - ht->cDeletedItems = 0; - return bckNew; /* new position of bckWatch, which was inserted last */ -} - -/*************************************************************************\ -| Find() | -| Does the quadratic searching stuff. RETURNS NULL if we don't | -| find an object with the given key, and a pointer to the Item | -| holding the key, if we do. Also sets posLastFind. If piEmpty is | -| non-NULL, we set it to the first open bucket we pass; helpful for | -| doing a later insert if the search fails, for instance. | -\*************************************************************************/ - -static HTItem *Find(HashTable *ht, ulong key, ulong *piEmpty) -{ - ulong iBucketFirst; - HTItem *item; - ulong offset = 0; /* the i in h(x) + i*(i-1)/2 */ - int fFoundEmpty = 0; /* set when we pass over an empty bucket */ - - ht->posLastFind = NULL; /* set up for failure: a new find starts */ - if ( ht->table == NULL ) /* empty hash table: find is bound to fail */ - return NULL; - - iBucketFirst = Hash(ht, KEY_PTR(ht, key), ht->cBuckets); - while ( 1 ) /* now try all i > 0 */ - { - item = Table(Find)(ht->table, iBucketFirst); - if ( item == NULL ) /* it's not in the table */ - { - if ( piEmpty && !fFoundEmpty ) *piEmpty = iBucketFirst; - return NULL; - } - else - { - if ( IS_BCK_DELETED(item) ) /* always 0 ifdef INSERT_ONLY */ - { - if ( piEmpty && !fFoundEmpty ) - { - *piEmpty = iBucketFirst; - fFoundEmpty = 1; - } - } else - if ( !KEY_CMP(ht, key, item->key) ) /* must be occupied */ - { - ht->posLastFind = item; - return item; /* we found it! */ - } - } - iBucketFirst = ((iBucketFirst + JUMP(KEY_PTR(ht, key), offset)) - & (ht->cBuckets-1)); - } -} - -/*************************************************************************\ -| Insert() | -| If an item with the key already exists in the hashtable, RETURNS | -| a pointer to the item (replacing its data if fOverwrite is 1). | -| If not, we find the first place-to-insert (which Find() is nice | -| enough to set for us) and insert the item there, RETURNing a | -| pointer to the item. We might grow the hashtable if it's getting | -| full. Note we include buckets holding DELETED when determining | -| fullness, because they slow down searching. | -\*************************************************************************/ - -static ulong NextPow2(ulong x) /* returns next power of 2 > x, or 2^31 */ -{ - if ( ((x << 1) >> 1) != x ) /* next power of 2 overflows */ - x >>= 1; /* so we return highest power of 2 we can */ - while ( (x & (x-1)) != 0 ) /* blacks out all but the top bit */ - x &= (x-1); - return x << 1; /* makes it the *next* power of 2 */ -} - -static HTItem *Insert(HashTable *ht, ulong key, ulong data, int fOverwrite) -{ - HTItem *item, bckInsert; - ulong iEmpty; /* first empty bucket key probes */ - - if ( ht->table == NULL ) /* empty hash table: find is bound to fail */ - return NULL; - item = Find(ht, key, &iEmpty); - ht->posLastFind = NULL; /* last operation is insert, not find */ - if ( item ) - { - if ( fOverwrite ) - item->data = data; /* key already matches */ - return item; - } - - COPY_KEY(ht, bckInsert.key, key); /* make our own copy of the key */ - bckInsert.data = data; /* oh, and the data too */ - item = Table(Insert)(ht->table, &bckInsert, iEmpty, &fOverwrite); - if ( fOverwrite ) /* we overwrote a deleted bucket */ - ht->cDeletedItems--; - ht->cItems++; /* insert couldn't have overwritten */ - if ( ht->cDeltaGoalSize > 0 ) /* closer to our goal size */ - ht->cDeltaGoalSize--; - if ( ht->cItems + ht->cDeletedItems >= ht->cBuckets * OCCUPANCY_PCT - || ht->cDeltaGoalSize < 0 ) /* we must've overestimated # of deletes */ - item = Rehash(ht, - NextPow2((ulong)(((ht->cDeltaGoalSize > 0 ? - ht->cDeltaGoalSize : 0) - + ht->cItems) / OCCUPANCY_PCT)), - item); - return item; -} - -/*************************************************************************\ -| Delete() | -| Removes the item from the hashtable, and if fShrink is 1, will | -| shrink the hashtable if it's too small (ie even after halving, | -| the ht would be less than half full, though in order to avoid | -| oscillating table size, we insist that after halving the ht would | -| be less than 40% full). RETURNS 1 if the item was found, 0 else. | -| If fLastFindSet is true, then this function is basically | -| DeleteLastFind. | -\*************************************************************************/ - -static int Delete(HashTable *ht, ulong key, int fShrink, int fLastFindSet) -{ - if ( !fLastFindSet && !Find(ht, key, NULL) ) - return 0; - SET_BCK_DELETED(ht, ht->posLastFind); /* find set this, how nice */ - ht->cItems--; - ht->cDeletedItems++; - if ( ht->cDeltaGoalSize < 0 ) /* heading towards our goal of deletion */ - ht->cDeltaGoalSize++; - - if ( fShrink && ht->cItems < ht->cBuckets * OCCUPANCY_PCT*0.4 - && ht->cDeltaGoalSize >= 0 /* wait until we're done deleting */ - && (ht->cBuckets >> 1) >= MIN_HASH_SIZE ) /* shrink */ - Rehash(ht, - NextPow2((ulong)((ht->cItems+ht->cDeltaGoalSize)/OCCUPANCY_PCT)), - NULL); - ht->posLastFind = NULL; /* last operation is delete, not find */ - return 1; -} - - -/* ======================================================================== */ -/* USER-VISIBLE API */ -/* ---------------------- */ - -/*************************************************************************\ -| AllocateHashTable() | -| ClearHashTable() | -| FreeHashTable() | -| Allocate() allocates a hash table and sets up size parameters. | -| Free() frees it. Clear() deletes all the items from the hash | -| table, but frees not. | -| cchKey is < 0 if the keys you send me are meant to be pointers | -| to \0-terminated strings. Then -cchKey is the maximum key size. | -| If cchKey < one word (ulong), the keys you send me are the keys | -| themselves; else the keys you send me are pointers to the data. | -| If fSaveKeys is 1, we copy any keys given to us to insert. We | -| also free these keys when freeing the hash table. If it's 0, the | -| user is responsible for key space management. | -| AllocateHashTable() RETURNS a hash table; the others TAKE one. | -\*************************************************************************/ - -HashTable *AllocateHashTable(int cchKey, int fSaveKeys) -{ - HashTable *ht; - - ht = (HashTable *) HTsmalloc(sizeof(*ht)); /* set everything to 0 */ - ht->cBuckets = Table(Allocate)(&ht->table, MIN_HASH_SIZE); - ht->cchKey = cchKey <= 0 ? NULL_TERMINATED : cchKey; - ht->cItems = 0; - ht->cDeletedItems = 0; - ht->fSaveKeys = fSaveKeys; - ht->cDeltaGoalSize = 0; - ht->iter = HTsmalloc( sizeof(TableIterator) ); - - ht->fpData = NULL; /* set by HashLoad, maybe */ - ht->bckData.data = (ulong) NULL; /* this must be done */ - HTSetupKeyTrunc(); /* in util.c */ - return ht; -} - -void ClearHashTable(HashTable *ht) -{ - HTItem *bck; - - if ( STORES_PTR(ht) && ht->fSaveKeys ) /* need to free keys */ - for ( bck = HashFirstBucket(ht); bck; bck = HashNextBucket(ht) ) - { - FREE_KEY(ht, bck->key); - if ( ht->fSaveKeys == 2 ) /* this means key stored in one block */ - break; /* ...so only free once */ - } - Table(Free)(ht->table, ht->cBuckets); - ht->cBuckets = Table(Allocate)(&ht->table, MIN_HASH_SIZE); - - ht->cItems = 0; - ht->cDeletedItems = 0; - ht->cDeltaGoalSize = 0; - ht->posLastFind = NULL; - ht->fpData = NULL; /* no longer HashLoading */ - if ( ht->bckData.data ) free( (char *)(ht)->bckData.data); - ht->bckData.data = (ulong) NULL; -} - -void FreeHashTable(HashTable *ht) -{ - ClearHashTable(ht); - if ( ht->iter ) HTfree(ht->iter, sizeof(TableIterator)); - if ( ht->table ) Table(Free)(ht->table, ht->cBuckets); - free(ht); -} - -/*************************************************************************\ -| HashFind() | -| HashFindLast() | -| HashFind(): looks in h(x) + i(i-1)/2 % t as i goes up from 0 | -| until we either find the key or hit an empty bucket. RETURNS a | -| pointer to the item in the hit bucket, if we find it, else | -| RETURNS NULL. | -| HashFindLast() returns the item returned by the last | -| HashFind(), which may be NULL if the last HashFind() failed. | -| LOAD_AND_RETURN reads the data from off disk, if necessary. | -\*************************************************************************/ - -HTItem *HashFind(HashTable *ht, ulong key) -{ - LOAD_AND_RETURN(ht, Find(ht, KEY_TRUNC(ht, key), NULL)); -} - -HTItem *HashFindLast(HashTable *ht) -{ - LOAD_AND_RETURN(ht, ht->posLastFind); -} - -/*************************************************************************\ -| HashFindOrInsert() | -| HashFindOrInsertItem() | -| HashInsert() | -| HashInsertItem() | -| HashDelete() | -| HashDeleteLast() | -| Pretty obvious what these guys do. Some take buckets (items), | -| some take keys and data separately. All things RETURN the bucket | -| (a pointer into the hashtable) if appropriate. | -\*************************************************************************/ - -HTItem *HashFindOrInsert(HashTable *ht, ulong key, ulong dataInsert) -{ - /* This is equivalent to Insert without samekey-overwrite */ - return Insert(ht, KEY_TRUNC(ht, key), dataInsert, 0); -} - -HTItem *HashFindOrInsertItem(HashTable *ht, HTItem *pItem) -{ - return HashFindOrInsert(ht, pItem->key, pItem->data); -} - -HTItem *HashInsert(HashTable *ht, ulong key, ulong data) -{ - return Insert(ht, KEY_TRUNC(ht, key), data, SAMEKEY_OVERWRITE); -} - -HTItem *HashInsertItem(HashTable *ht, HTItem *pItem) -{ - return HashInsert(ht, pItem->key, pItem->data); -} - -int HashDelete(HashTable *ht, ulong key) -{ - return Delete(ht, KEY_TRUNC(ht, key), !FAST_DELETE, 0); -} - -int HashDeleteLast(HashTable *ht) -{ - if ( !ht->posLastFind ) /* last find failed */ - return 0; - return Delete(ht, 0, !FAST_DELETE, 1); /* no need to specify a key */ -} - -/*************************************************************************\ -| HashFirstBucket() | -| HashNextBucket() | -| Iterates through the items in the hashtable by iterating through | -| the table. Since we know about deleted buckets and loading data | -| off disk, and the table doesn't, our job is to take care of these | -| things. RETURNS a bucket, or NULL after the last bucket. | -\*************************************************************************/ - -HTItem *HashFirstBucket(HashTable *ht) -{ - HTItem *retval; - - for ( retval = Table(FirstBucket)(ht->iter, ht->table, ht->cBuckets); - retval; retval = Table(NextBucket)(ht->iter) ) - if ( !IS_BCK_DELETED(retval) ) - LOAD_AND_RETURN(ht, retval); - return NULL; -} - -HTItem *HashNextBucket(HashTable *ht) -{ - HTItem *retval; - - while ( (retval=Table(NextBucket)(ht->iter)) ) - if ( !IS_BCK_DELETED(retval) ) - LOAD_AND_RETURN(ht, retval); - return NULL; -} - -/*************************************************************************\ -| HashSetDeltaGoalSize() | -| If we're going to insert 100 items, set the delta goal size to | -| 100 and we take that into account when inserting. Likewise, if | -| we're going to delete 10 items, set it to -100 and we won't | -| rehash until all 100 have been done. It's ok to be wrong, but | -| it's efficient to be right. Returns the delta value. | -\*************************************************************************/ - -int HashSetDeltaGoalSize(HashTable *ht, int delta) -{ - ht->cDeltaGoalSize = delta; -#if FAST_DELETE == 1 || defined INSERT_ONLY - if ( ht->cDeltaGoalSize < 0 ) /* for fast delete, we never */ - ht->cDeltaGoalSize = 0; /* ...rehash after deletion */ -#endif - return ht->cDeltaGoalSize; -} - - -/*************************************************************************\ -| HashSave() | -| HashLoad() | -| HashLoadKeys() | -| Routines for saving and loading the hashtable from disk. We can | -| then use the hashtable in two ways: loading it back into memory | -| (HashLoad()) or loading only the keys into memory, in which case | -| the data for a given key is loaded off disk when the key is | -| retrieved. The data is freed when something new is retrieved in | -| its place, so this is not a "lazy-load" scheme. | -| The key is saved automatically and restored upon load, but the | -| user needs to specify a routine for reading and writing the data. | -| fSaveKeys is of course set to 1 when you read in a hashtable. | -| HashLoad RETURNS a newly allocated hashtable. | -| DATA_WRITE() takes an fp and a char * (representing the data | -| field), and must perform two separate tasks. If fp is NULL, | -| return the number of bytes written. If not, writes the data to | -| disk at the place the fp points to. | -| DATA_READ() takes an fp and the number of bytes in the data | -| field, and returns a char * which points to wherever you've | -| written the data. Thus, you must allocate memory for the data. | -| Both dataRead and dataWrite may be NULL if you just wish to | -| store the data field directly, as an integer. | -\*************************************************************************/ - -void HashSave(FILE *fp, HashTable *ht, int (*dataWrite)(FILE *, char *)) -{ - long cchData, posStart; - HTItem *bck; - - /* File format: magic number (4 bytes) - : cchKey (one word) - : cItems (one word) - : cDeletedItems (one word) - : table info (buckets and a bitmap) - : cchAllKeys (one word) - Then the keys, in a block. If cchKey is NULL_TERMINATED, the keys - are null-terminated too, otherwise this takes up cchKey*cItems bytes. - Note that keys are not written for DELETED buckets. - Then the data: - : EITHER DELETED (one word) to indicate it's a deleted bucket, - : OR number of bytes for this (non-empty) bucket's data - (one word). This is not stored if dataWrite == NULL - since the size is known to be sizeof(ul). Plus: - : the data for this bucket (variable length) - All words are in network byte order. */ - - fprintf(fp, "%s", MAGIC_KEY); - WRITE_UL(fp, ht->cchKey); /* WRITE_UL, READ_UL, etc in fks-hash.h */ - WRITE_UL(fp, ht->cItems); - WRITE_UL(fp, ht->cDeletedItems); - Table(Write)(fp, ht->table, ht->cBuckets); /* writes cBuckets too */ - - WRITE_UL(fp, 0); /* to be replaced with sizeof(key block) */ - posStart = ftell(fp); - for ( bck = HashFirstBucket(ht); bck; bck = HashNextBucket(ht) ) - fwrite(KEY_PTR(ht, bck->key), 1, - (ht->cchKey == NULL_TERMINATED ? - strlen(KEY_PTR(ht, bck->key))+1 : ht->cchKey), fp); - cchData = ftell(fp) - posStart; - fseek(fp, posStart - sizeof(unsigned long), SEEK_SET); - WRITE_UL(fp, cchData); - fseek(fp, 0, SEEK_END); /* done with our sojourn at the header */ - - /* Unlike HashFirstBucket, TableFirstBucket iters through deleted bcks */ - for ( bck = Table(FirstBucket)(ht->iter, ht->table, ht->cBuckets); - bck; bck = Table(NextBucket)(ht->iter) ) - if ( dataWrite == NULL || IS_BCK_DELETED(bck) ) - WRITE_UL(fp, bck->data); - else /* write cchData followed by the data */ - { - WRITE_UL(fp, (*dataWrite)(NULL, (char *)bck->data)); - (*dataWrite)(fp, (char *)bck->data); - } -} - -static HashTable *HashDoLoad(FILE *fp, char * (*dataRead)(FILE *, int), - HashTable *ht) -{ - ulong cchKey; - char szMagicKey[4], *rgchKeys; - HTItem *bck; - - fread(szMagicKey, 1, 4, fp); - if ( strncmp(szMagicKey, MAGIC_KEY, 4) ) - { - fprintf(stderr, "ERROR: not a hash table (magic key is %4.4s, not %s)\n", - szMagicKey, MAGIC_KEY); - exit(3); - } - Table(Free)(ht->table, ht->cBuckets); /* allocated in AllocateHashTable */ - - READ_UL(fp, ht->cchKey); - READ_UL(fp, ht->cItems); - READ_UL(fp, ht->cDeletedItems); - ht->cBuckets = Table(Read)(fp, &ht->table); /* next is the table info */ - - READ_UL(fp, cchKey); - rgchKeys = (char *) HTsmalloc( cchKey ); /* stores all the keys */ - fread(rgchKeys, 1, cchKey, fp); - /* We use the table iterator so we don't try to LOAD_AND_RETURN */ - for ( bck = Table(FirstBucket)(ht->iter, ht->table, ht->cBuckets); - bck; bck = Table(NextBucket)(ht->iter) ) - { - READ_UL(fp, bck->data); /* all we need if dataRead is NULL */ - if ( IS_BCK_DELETED(bck) ) /* always 0 if defined(INSERT_ONLY) */ - continue; /* this is why we read the data first */ - if ( dataRead != NULL ) /* if it's null, we're done */ - if ( !ht->fpData ) /* load data into memory */ - bck->data = (ulong)dataRead(fp, bck->data); - else /* store location of data on disk */ - { - fseek(fp, bck->data, SEEK_CUR); /* bck->data held size of data */ - bck->data = ftell(fp) - bck->data - sizeof(unsigned long); - } - - if ( ht->cchKey == NULL_TERMINATED ) /* now read the key */ - { - bck->key = (ulong) rgchKeys; - rgchKeys = strchr(rgchKeys, '\0') + 1; /* read past the string */ - } - else - { - if ( STORES_PTR(ht) ) /* small keys stored directly */ - bck->key = (ulong) rgchKeys; - else - memcpy(&bck->key, rgchKeys, ht->cchKey); - rgchKeys += ht->cchKey; - } - } - if ( !STORES_PTR(ht) ) /* keys are stored directly */ - HTfree(rgchKeys - cchKey, cchKey); /* we've advanced rgchK to end */ - return ht; -} - -HashTable *HashLoad(FILE *fp, char * (*dataRead)(FILE *, int)) -{ - HashTable *ht; - ht = AllocateHashTable(0, 2); /* cchKey set later, fSaveKey should be 2! */ - return HashDoLoad(fp, dataRead, ht); -} - -HashTable *HashLoadKeys(FILE *fp, char * (*dataRead)(FILE *, int)) -{ - HashTable *ht; - - if ( dataRead == NULL ) - return HashLoad(fp, NULL); /* no reason not to load the data here */ - ht = AllocateHashTable(0, 2); /* cchKey set later, fSaveKey should be 2! */ - ht->fpData = fp; /* tells HashDoLoad() to only load keys */ - ht->dataRead = dataRead; - return HashDoLoad(fp, dataRead, ht); -} - -/*************************************************************************\ -| PrintHashTable() | -| A debugging tool. Prints the entire contents of the hash table, | -| like so: : key of the contents. Returns number of bytes | -| allocated. If time is not -1, we print it as the time required | -| for the hash. If iForm is 0, we just print the stats. If it's | -| 1, we print the keys and data too, but the keys are printed as | -| ulongs. If it's 2, we print the keys correctly (as long numbers | -| or as strings). | -\*************************************************************************/ - -ulong PrintHashTable(HashTable *ht, double time, int iForm) -{ - ulong cbData = 0, cbBin = 0, cItems = 0, cOccupied = 0; - HTItem *item; - - printf("HASH TABLE.\n"); - if ( time > -1.0 ) - { - printf("----------\n"); - printf("Time: %27.2f\n", time); - } - - for ( item = Table(FirstBucket)(ht->iter, ht->table, ht->cBuckets); - item; item = Table(NextBucket)(ht->iter) ) - { - cOccupied++; /* this includes deleted buckets */ - if ( IS_BCK_DELETED(item) ) /* we don't need you for anything else */ - continue; - cItems++; /* this is for a sanity check */ - if ( STORES_PTR(ht) ) - cbData += ht->cchKey == NULL_TERMINATED ? - WORD_ROUND(strlen((char *)item->key)+1) : ht->cchKey; - else - cbBin -= sizeof(item->key), cbData += sizeof(item->key); - cbBin -= sizeof(item->data), cbData += sizeof(item->data); - if ( iForm != 0 ) /* we want the actual contents */ - { - if ( iForm == 2 && ht->cchKey == NULL_TERMINATED ) - printf("%s/%lu\n", (char *)item->key, item->data); - else if ( iForm == 2 && STORES_PTR(ht) ) - printf("%.*s/%lu\n", - (int)ht->cchKey, (char *)item->key, item->data); - else /* either key actually is a ulong, or iForm == 1 */ - printf("%lu/%lu\n", item->key, item->data); - } - } - assert( cItems == ht->cItems ); /* sanity check */ - cbBin = Table(Memory)(ht->cBuckets, cOccupied); - - printf("----------\n"); - printf("%lu buckets (%lu bytes). %lu empty. %lu hold deleted items.\n" - "%lu items (%lu bytes).\n" - "%lu bytes total. %lu bytes (%2.1f%%) of this is ht overhead.\n", - ht->cBuckets, cbBin, ht->cBuckets - cOccupied, cOccupied - ht->cItems, - ht->cItems, cbData, - cbData + cbBin, cbBin, cbBin*100.0/(cbBin+cbData)); - - return cbData + cbBin; -} diff --git a/src/sparsehash-1.6/experimental/libchash.h b/src/sparsehash-1.6/experimental/libchash.h deleted file mode 100644 index 0c0f70a..0000000 --- a/src/sparsehash-1.6/experimental/libchash.h +++ /dev/null @@ -1,252 +0,0 @@ -/* Copyright (c) 1998 - 2005, Google Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Google Inc. nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * --- - * Author: Craig Silverstein - * - * This library is intended to be used for in-memory hash tables, - * though it provides rudimentary permanent-storage capabilities. - * It attempts to be fast, portable, and small. The best algorithm - * to fulfill these goals is an internal probing hashing algorithm, - * as in Knuth, _Art of Computer Programming_, vol III. Unlike - * chained (open) hashing, it doesn't require a pointer for every - * item, yet it is still constant time lookup in practice. - * - * Also to save space, we let the contents (both data and key) that - * you insert be a union: if the key/data is small, we store it - * directly in the hashtable, otherwise we store a pointer to it. - * To keep you from having to figure out which, use KEY_PTR and - * PTR_KEY to convert between the arguments to these functions and - * a pointer to the real data. For instance: - * char key[] = "ab", *key2; - * HTItem *bck; HashTable *ht; - * HashInsert(ht, PTR_KEY(ht, key), 0); - * bck = HashFind(ht, PTR_KEY(ht, "ab")); - * key2 = KEY_PTR(ht, bck->key); - * - * There are a rich set of operations supported: - * AllocateHashTable() -- Allocates a hashtable structure and - * returns it. - * cchKey: if it's a positive number, then each key is a - * fixed-length record of that length. If it's 0, - * the key is assumed to be a \0-terminated string. - * fSaveKey: normally, you are responsible for allocating - * space for the key. If this is 1, we make a - * copy of the key for you. - * ClearHashTable() -- Removes everything from a hashtable - * FreeHashTable() -- Frees memory used by a hashtable - * - * HashFind() -- takes a key (use PTR_KEY) and returns the - * HTItem containing that key, or NULL if the - * key is not in the hashtable. - * HashFindLast() -- returns the item found by last HashFind() - * HashFindOrInsert() -- inserts the key/data pair if the key - * is not already in the hashtable, or - * returns the appropraite HTItem if it is. - * HashFindOrInsertItem() -- takes key/data as an HTItem. - * HashInsert() -- adds a key/data pair to the hashtable. What - * it does if the key is already in the table - * depends on the value of SAMEKEY_OVERWRITE. - * HashInsertItem() -- takes key/data as an HTItem. - * HashDelete() -- removes a key/data pair from the hashtable, - * if it's there. RETURNS 1 if it was there, - * 0 else. - * If you use sparse tables and never delete, the full data - * space is available. Otherwise we steal -2 (maybe -3), - * so you can't have data fields with those values. - * HashDeleteLast() -- deletes the item returned by the last Find(). - * - * HashFirstBucket() -- used to iterate over the buckets in a - * hashtable. DON'T INSERT OR DELETE WHILE - * ITERATING! You can't nest iterations. - * HashNextBucket() -- RETURNS NULL at the end of iterating. - * - * HashSetDeltaGoalSize() -- if you're going to insert 1000 items - * at once, call this fn with arg 1000. - * It grows the table more intelligently. - * - * HashSave() -- saves the hashtable to a file. It saves keys ok, - * but it doesn't know how to interpret the data field, - * so if the data field is a pointer to some complex - * structure, you must send a function that takes a - * file pointer and a pointer to the structure, and - * write whatever you want to write. It should return - * the number of bytes written. If the file is NULL, - * it should just return the number of bytes it would - * write, without writing anything. - * If your data field is just an integer, not a - * pointer, just send NULL for the function. - * HashLoad() -- loads a hashtable. It needs a function that takes - * a file and the size of the structure, and expects - * you to read in the structure and return a pointer - * to it. You must do memory allocation, etc. If - * the data is just a number, send NULL. - * HashLoadKeys() -- unlike HashLoad(), doesn't load the data off disk - * until needed. This saves memory, but if you look - * up the same key a lot, it does a disk access each - * time. - * You can't do Insert() or Delete() on hashtables that were loaded - * from disk. - */ - -#include /* includes definition of "ulong", we hope */ -#define ulong u_long - -#define MAGIC_KEY "CHsh" /* when we save the file */ - -#ifndef LOG_WORD_SIZE /* 5 for 32 bit words, 6 for 64 */ -#if defined (__LP64__) || defined (_LP64) -#define LOG_WORD_SIZE 6 /* log_2(sizeof(ulong)) [in bits] */ -#else -#define LOG_WORD_SIZE 5 /* log_2(sizeof(ulong)) [in bits] */ -#endif -#endif - - /* The following gives a speed/time tradeoff: how many buckets are * - * in each bin. 0 gives 32 buckets/bin, which is a good number. */ -#ifndef LOG_BM_WORDS -#define LOG_BM_WORDS 0 /* each group has 2^L_B_W * 32 buckets */ -#endif - - /* The following are all parameters that affect performance. */ -#ifndef JUMP -#define JUMP(key, offset) ( ++(offset) ) /* ( 1 ) for linear hashing */ -#endif -#ifndef Table -#define Table(x) Sparse##x /* Dense##x for dense tables */ -#endif -#ifndef FAST_DELETE -#define FAST_DELETE 0 /* if it's 1, we never shrink the ht */ -#endif -#ifndef SAMEKEY_OVERWRITE -#define SAMEKEY_OVERWRITE 1 /* overwrite item with our key on insert? */ -#endif -#ifndef OCCUPANCY_PCT -#define OCCUPANCY_PCT 0.5 /* large PCT means smaller and slower */ -#endif -#ifndef MIN_HASH_SIZE -#define MIN_HASH_SIZE 512 /* ht size when first created */ -#endif - /* When deleting a bucket, we can't just empty it (future hashes * - * may fail); instead we set the data field to DELETED. Thus you * - * should set DELETED to a data value you never use. Better yet, * - * if you don't need to delete, define INSERT_ONLY. */ -#ifndef INSERT_ONLY -#define DELETED -2UL -#define IS_BCK_DELETED(bck) ( (bck) && (bck)->data == DELETED ) -#define SET_BCK_DELETED(ht, bck) do { (bck)->data = DELETED; \ - FREE_KEY(ht, (bck)->key); } while ( 0 ) -#else -#define IS_BCK_DELETED(bck) 0 -#define SET_BCK_DELETED(ht, bck) \ - do { fprintf(stderr, "Deletion not supported for insert-only hashtable\n");\ - exit(2); } while ( 0 ) -#endif - - /* We need the following only for dense buckets (Dense##x above). * - * If you need to, set this to a value you'll never use for data. */ -#define EMPTY -3UL /* steal more of the bck->data space */ - - - /* This is what an item is. Either can be cast to a pointer. */ -typedef struct { - ulong data; /* 4 bytes for data: either a pointer or an integer */ - ulong key; /* 4 bytes for the key: either a pointer or an int */ -} HTItem; - -struct Table(Bin); /* defined in chash.c, I hope */ -struct Table(Iterator); -typedef struct Table(Bin) Table; /* Expands to SparseBin, etc */ -typedef struct Table(Iterator) TableIterator; - - /* for STORES_PTR to work ok, cchKey MUST BE DEFINED 1st, cItems 2nd! */ -typedef struct HashTable { - ulong cchKey; /* the length of the key, or if it's \0 terminated */ - ulong cItems; /* number of items currently in the hashtable */ - ulong cDeletedItems; /* # of buckets holding DELETE in the hashtable */ - ulong cBuckets; /* size of the table */ - Table *table; /* The actual contents of the hashtable */ - int fSaveKeys; /* 1 if we copy keys locally; 2 if keys in one block */ - int cDeltaGoalSize; /* # of coming inserts (or deletes, if <0) we expect */ - HTItem *posLastFind; /* position of last Find() command */ - TableIterator *iter; /* used in First/NextBucket */ - - FILE *fpData; /* if non-NULL, what item->data points into */ - char * (*dataRead)(FILE *, int); /* how to load data from disk */ - HTItem bckData; /* holds data after being loaded from disk */ -} HashTable; - - /* Small keys are stored and passed directly, but large keys are - * stored and passed as pointers. To make it easier to remember - * what to pass, we provide two functions: - * PTR_KEY: give it a pointer to your data, and it returns - * something appropriate to send to Hash() functions or - * be stored in a data field. - * KEY_PTR: give it something returned by a Hash() routine, and - * it returns a (char *) pointer to the actual data. - */ -#define HashKeySize(ht) ( ((ulong *)(ht))[0] ) /* this is how we inline */ -#define HashSize(ht) ( ((ulong *)(ht))[1] ) /* ...a la C++ :-) */ - -#define STORES_PTR(ht) ( HashKeySize(ht) == 0 || \ - HashKeySize(ht) > sizeof(ulong) ) -#define KEY_PTR(ht, key) ( STORES_PTR(ht) ? (char *)(key) : (char *)&(key) ) -#ifdef DONT_HAVE_TO_WORRY_ABOUT_BUS_ERRORS -#define PTR_KEY(ht, ptr) ( STORES_PTR(ht) ? (ulong)(ptr) : *(ulong *)(ptr) ) -#else -#define PTR_KEY(ht, ptr) ( STORES_PTR(ht) ? (ulong)(ptr) : HTcopy((char *)ptr)) -#endif - - - /* Function prototypes */ -unsigned long HTcopy(char *pul); /* for PTR_KEY, not for users */ - -struct HashTable *AllocateHashTable(int cchKey, int fSaveKeys); -void ClearHashTable(struct HashTable *ht); -void FreeHashTable(struct HashTable *ht); - -HTItem *HashFind(struct HashTable *ht, ulong key); -HTItem *HashFindLast(struct HashTable *ht); -HTItem *HashFindOrInsert(struct HashTable *ht, ulong key, ulong dataInsert); -HTItem *HashFindOrInsertItem(struct HashTable *ht, HTItem *pItem); - -HTItem *HashInsert(struct HashTable *ht, ulong key, ulong data); -HTItem *HashInsertItem(struct HashTable *ht, HTItem *pItem); - -int HashDelete(struct HashTable *ht, ulong key); -int HashDeleteLast(struct HashTable *ht); - -HTItem *HashFirstBucket(struct HashTable *ht); -HTItem *HashNextBucket(struct HashTable *ht); - -int HashSetDeltaGoalSize(struct HashTable *ht, int delta); - -void HashSave(FILE *fp, struct HashTable *ht, int (*write)(FILE *, char *)); -struct HashTable *HashLoad(FILE *fp, char * (*read)(FILE *, int)); -struct HashTable *HashLoadKeys(FILE *fp, char * (*read)(FILE *, int)); diff --git a/src/sparsehash-1.6/google-sparsehash.sln b/src/sparsehash-1.6/google-sparsehash.sln deleted file mode 100755 index 6148fb5..0000000 --- a/src/sparsehash-1.6/google-sparsehash.sln +++ /dev/null @@ -1,47 +0,0 @@ -Microsoft Visual Studio Solution File, Format Version 8.00 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "type_traits_unittest", "vsprojects\type_traits_unittest\type_traits_unittest.vcproj", "{008CCFED-7D7B-46F8-8E13-03837A2258B3}" - ProjectSection(ProjectDependencies) = postProject - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "sparsetable_unittest", "vsprojects\sparsetable_unittest\sparsetable_unittest.vcproj", "{E420867B-8BFA-4739-99EC-E008AB762FF9}" - ProjectSection(ProjectDependencies) = postProject - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hashtable_unittest", "vsprojects\hashtable_unittest\hashtable_unittest.vcproj", "{FCDB3718-F01C-4DE4-B9F5-E10F2C5C0535}" - ProjectSection(ProjectDependencies) = postProject - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "time_hash_map", "vsprojects\time_hash_map\time_hash_map.vcproj", "{A74E5DB8-5295-487A-AB1D-23859F536F45}" - ProjectSection(ProjectDependencies) = postProject - EndProjectSection -EndProject -Global - GlobalSection(SolutionConfiguration) = preSolution - Debug = Debug - Release = Release - EndGlobalSection - GlobalSection(ProjectDependencies) = postSolution - EndGlobalSection - GlobalSection(ProjectConfiguration) = postSolution - {008CCFED-7D7B-46F8-8E13-03837A2258B3}.Debug.ActiveCfg = Debug|Win32 - {008CCFED-7D7B-46F8-8E13-03837A2258B3}.Debug.Build.0 = Debug|Win32 - {008CCFED-7D7B-46F8-8E13-03837A2258B3}.Release.ActiveCfg = Release|Win32 - {008CCFED-7D7B-46F8-8E13-03837A2258B3}.Release.Build.0 = Release|Win32 - {E420867B-8BFA-4739-99EC-E008AB762FF9}.Debug.ActiveCfg = Debug|Win32 - {E420867B-8BFA-4739-99EC-E008AB762FF9}.Debug.Build.0 = Debug|Win32 - {E420867B-8BFA-4739-99EC-E008AB762FF9}.Release.ActiveCfg = Release|Win32 - {E420867B-8BFA-4739-99EC-E008AB762FF9}.Release.Build.0 = Release|Win32 - {FCDB3718-F01C-4DE4-B9F5-E10F2C5C0535}.Debug.ActiveCfg = Debug|Win32 - {FCDB3718-F01C-4DE4-B9F5-E10F2C5C0535}.Debug.Build.0 = Debug|Win32 - {FCDB3718-F01C-4DE4-B9F5-E10F2C5C0535}.Release.ActiveCfg = Release|Win32 - {FCDB3718-F01C-4DE4-B9F5-E10F2C5C0535}.Release.Build.0 = Release|Win32 - {A74E5DB8-5295-487A-AB1D-23859F536F45}.Debug.ActiveCfg = Debug|Win32 - {A74E5DB8-5295-487A-AB1D-23859F536F45}.Debug.Build.0 = Debug|Win32 - {A74E5DB8-5295-487A-AB1D-23859F536F45}.Release.ActiveCfg = Release|Win32 - {A74E5DB8-5295-487A-AB1D-23859F536F45}.Release.Build.0 = Release|Win32 - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - EndGlobalSection - GlobalSection(ExtensibilityAddIns) = postSolution - EndGlobalSection -EndGlobal diff --git a/src/sparsehash-1.6/hashtable_unittest b/src/sparsehash-1.6/hashtable_unittest deleted file mode 100755 index d4ce1fc..0000000 Binary files a/src/sparsehash-1.6/hashtable_unittest and /dev/null differ diff --git a/src/sparsehash-1.6/m4/acx_pthread.m4 b/src/sparsehash-1.6/m4/acx_pthread.m4 deleted file mode 100644 index 2cf20de..0000000 --- a/src/sparsehash-1.6/m4/acx_pthread.m4 +++ /dev/null @@ -1,363 +0,0 @@ -# This was retrieved from -# http://svn.0pointer.de/viewvc/trunk/common/acx_pthread.m4?revision=1277&root=avahi -# See also (perhaps for new versions?) -# http://svn.0pointer.de/viewvc/trunk/common/acx_pthread.m4?root=avahi -# -# We've rewritten the inconsistency check code (from avahi), to work -# more broadly. In particular, it no longer assumes ld accepts -zdefs. -# This caused a restructing of the code, but the functionality has only -# changed a little. - -dnl @synopsis ACX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) -dnl -dnl @summary figure out how to build C programs using POSIX threads -dnl -dnl This macro figures out how to build C programs using POSIX threads. -dnl It sets the PTHREAD_LIBS output variable to the threads library and -dnl linker flags, and the PTHREAD_CFLAGS output variable to any special -dnl C compiler flags that are needed. (The user can also force certain -dnl compiler flags/libs to be tested by setting these environment -dnl variables.) -dnl -dnl Also sets PTHREAD_CC to any special C compiler that is needed for -dnl multi-threaded programs (defaults to the value of CC otherwise). -dnl (This is necessary on AIX to use the special cc_r compiler alias.) -dnl -dnl NOTE: You are assumed to not only compile your program with these -dnl flags, but also link it with them as well. e.g. you should link -dnl with $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS -dnl $LIBS -dnl -dnl If you are only building threads programs, you may wish to use -dnl these variables in your default LIBS, CFLAGS, and CC: -dnl -dnl LIBS="$PTHREAD_LIBS $LIBS" -dnl CFLAGS="$CFLAGS $PTHREAD_CFLAGS" -dnl CC="$PTHREAD_CC" -dnl -dnl In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute -dnl constant has a nonstandard name, defines PTHREAD_CREATE_JOINABLE to -dnl that name (e.g. PTHREAD_CREATE_UNDETACHED on AIX). -dnl -dnl ACTION-IF-FOUND is a list of shell commands to run if a threads -dnl library is found, and ACTION-IF-NOT-FOUND is a list of commands to -dnl run it if it is not found. If ACTION-IF-FOUND is not specified, the -dnl default action will define HAVE_PTHREAD. -dnl -dnl Please let the authors know if this macro fails on any platform, or -dnl if you have any other suggestions or comments. This macro was based -dnl on work by SGJ on autoconf scripts for FFTW (www.fftw.org) (with -dnl help from M. Frigo), as well as ac_pthread and hb_pthread macros -dnl posted by Alejandro Forero Cuervo to the autoconf macro repository. -dnl We are also grateful for the helpful feedback of numerous users. -dnl -dnl @category InstalledPackages -dnl @author Steven G. Johnson -dnl @version 2006-05-29 -dnl @license GPLWithACException -dnl -dnl Checks for GCC shared/pthread inconsistency based on work by -dnl Marcin Owsiany - - -AC_DEFUN([ACX_PTHREAD], [ -AC_REQUIRE([AC_CANONICAL_HOST]) -AC_LANG_SAVE -AC_LANG_C -acx_pthread_ok=no - -# We used to check for pthread.h first, but this fails if pthread.h -# requires special compiler flags (e.g. on True64 or Sequent). -# It gets checked for in the link test anyway. - -# First of all, check if the user has set any of the PTHREAD_LIBS, -# etcetera environment variables, and if threads linking works using -# them: -if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then - save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS $PTHREAD_CFLAGS" - save_LIBS="$LIBS" - LIBS="$PTHREAD_LIBS $LIBS" - AC_MSG_CHECKING([for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS]) - AC_TRY_LINK_FUNC(pthread_join, acx_pthread_ok=yes) - AC_MSG_RESULT($acx_pthread_ok) - if test x"$acx_pthread_ok" = xno; then - PTHREAD_LIBS="" - PTHREAD_CFLAGS="" - fi - LIBS="$save_LIBS" - CFLAGS="$save_CFLAGS" -fi - -# We must check for the threads library under a number of different -# names; the ordering is very important because some systems -# (e.g. DEC) have both -lpthread and -lpthreads, where one of the -# libraries is broken (non-POSIX). - -# Create a list of thread flags to try. Items starting with a "-" are -# C compiler flags, and other items are library names, except for "none" -# which indicates that we try without any flags at all, and "pthread-config" -# which is a program returning the flags for the Pth emulation library. - -acx_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config" - -# The ordering *is* (sometimes) important. Some notes on the -# individual items follow: - -# pthreads: AIX (must check this before -lpthread) -# none: in case threads are in libc; should be tried before -Kthread and -# other compiler flags to prevent continual compiler warnings -# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) -# -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) -# lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) -# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads) -# -pthreads: Solaris/gcc -# -mthreads: Mingw32/gcc, Lynx/gcc -# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it -# doesn't hurt to check since this sometimes defines pthreads too; -# also defines -D_REENTRANT) -# ... -mt is also the pthreads flag for HP/aCC -# pthread: Linux, etcetera -# --thread-safe: KAI C++ -# pthread-config: use pthread-config program (for GNU Pth library) - -case "${host_cpu}-${host_os}" in - *solaris*) - - # On Solaris (at least, for some versions), libc contains stubbed - # (non-functional) versions of the pthreads routines, so link-based - # tests will erroneously succeed. (We need to link with -pthreads/-mt/ - # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather - # a function called by this macro, so we could check for that, but - # who knows whether they'll stub that too in a future libc.) So, - # we'll just look for -pthreads and -lpthread first: - - acx_pthread_flags="-pthreads pthread -mt -pthread $acx_pthread_flags" - ;; -esac - -if test x"$acx_pthread_ok" = xno; then -for flag in $acx_pthread_flags; do - - case $flag in - none) - AC_MSG_CHECKING([whether pthreads work without any flags]) - ;; - - -*) - AC_MSG_CHECKING([whether pthreads work with $flag]) - PTHREAD_CFLAGS="$flag" - ;; - - pthread-config) - AC_CHECK_PROG(acx_pthread_config, pthread-config, yes, no) - if test x"$acx_pthread_config" = xno; then continue; fi - PTHREAD_CFLAGS="`pthread-config --cflags`" - PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`" - ;; - - *) - AC_MSG_CHECKING([for the pthreads library -l$flag]) - PTHREAD_LIBS="-l$flag" - ;; - esac - - save_LIBS="$LIBS" - save_CFLAGS="$CFLAGS" - LIBS="$PTHREAD_LIBS $LIBS" - CFLAGS="$CFLAGS $PTHREAD_CFLAGS" - - # Check for various functions. We must include pthread.h, - # since some functions may be macros. (On the Sequent, we - # need a special flag -Kthread to make this header compile.) - # We check for pthread_join because it is in -lpthread on IRIX - # while pthread_create is in libc. We check for pthread_attr_init - # due to DEC craziness with -lpthreads. We check for - # pthread_cleanup_push because it is one of the few pthread - # functions on Solaris that doesn't have a non-functional libc stub. - # We try pthread_create on general principles. - AC_TRY_LINK([#include ], - [pthread_t th; pthread_join(th, 0); - pthread_attr_init(0); pthread_cleanup_push(0, 0); - pthread_create(0,0,0,0); pthread_cleanup_pop(0); ], - [acx_pthread_ok=yes]) - - LIBS="$save_LIBS" - CFLAGS="$save_CFLAGS" - - AC_MSG_RESULT($acx_pthread_ok) - if test "x$acx_pthread_ok" = xyes; then - break; - fi - - PTHREAD_LIBS="" - PTHREAD_CFLAGS="" -done -fi - -# Various other checks: -if test "x$acx_pthread_ok" = xyes; then - save_LIBS="$LIBS" - LIBS="$PTHREAD_LIBS $LIBS" - save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS $PTHREAD_CFLAGS" - - # Detect AIX lossage: JOINABLE attribute is called UNDETACHED. - AC_MSG_CHECKING([for joinable pthread attribute]) - attr_name=unknown - for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do - AC_TRY_LINK([#include ], [int attr=$attr; return attr;], - [attr_name=$attr; break]) - done - AC_MSG_RESULT($attr_name) - if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then - AC_DEFINE_UNQUOTED(PTHREAD_CREATE_JOINABLE, $attr_name, - [Define to necessary symbol if this constant - uses a non-standard name on your system.]) - fi - - AC_MSG_CHECKING([if more special flags are required for pthreads]) - flag=no - case "${host_cpu}-${host_os}" in - *-aix* | *-freebsd* | *-darwin*) flag="-D_THREAD_SAFE";; - *solaris* | *-osf* | *-hpux*) flag="-D_REENTRANT";; - esac - AC_MSG_RESULT(${flag}) - if test "x$flag" != xno; then - PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS" - fi - - LIBS="$save_LIBS" - CFLAGS="$save_CFLAGS" - # More AIX lossage: must compile with xlc_r or cc_r - if test x"$GCC" != xyes; then - AC_CHECK_PROGS(PTHREAD_CC, xlc_r cc_r, ${CC}) - else - PTHREAD_CC=$CC - fi - - # The next part tries to detect GCC inconsistency with -shared on some - # architectures and systems. The problem is that in certain - # configurations, when -shared is specified, GCC "forgets" to - # internally use various flags which are still necessary. - - # - # Prepare the flags - # - save_CFLAGS="$CFLAGS" - save_LIBS="$LIBS" - save_CC="$CC" - - # Try with the flags determined by the earlier checks. - # - # -Wl,-z,defs forces link-time symbol resolution, so that the - # linking checks with -shared actually have any value - # - # FIXME: -fPIC is required for -shared on many architectures, - # so we specify it here, but the right way would probably be to - # properly detect whether it is actually required. - CFLAGS="-shared -fPIC -Wl,-z,defs $CFLAGS $PTHREAD_CFLAGS" - LIBS="$PTHREAD_LIBS $LIBS" - CC="$PTHREAD_CC" - - # In order not to create several levels of indentation, we test - # the value of "$done" until we find the cure or run out of ideas. - done="no" - - # First, make sure the CFLAGS we added are actually accepted by our - # compiler. If not (and OS X's ld, for instance, does not accept -z), - # then we can't do this test. - if test x"$done" = xno; then - AC_MSG_CHECKING([whether to check for GCC pthread/shared inconsistencies]) - AC_TRY_LINK(,, , [done=yes]) - - if test "x$done" = xyes ; then - AC_MSG_RESULT([no]) - else - AC_MSG_RESULT([yes]) - fi - fi - - if test x"$done" = xno; then - AC_MSG_CHECKING([whether -pthread is sufficient with -shared]) - AC_TRY_LINK([#include ], - [pthread_t th; pthread_join(th, 0); - pthread_attr_init(0); pthread_cleanup_push(0, 0); - pthread_create(0,0,0,0); pthread_cleanup_pop(0); ], - [done=yes]) - - if test "x$done" = xyes; then - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - fi - fi - - # - # Linux gcc on some architectures such as mips/mipsel forgets - # about -lpthread - # - if test x"$done" = xno; then - AC_MSG_CHECKING([whether -lpthread fixes that]) - LIBS="-lpthread $PTHREAD_LIBS $save_LIBS" - AC_TRY_LINK([#include ], - [pthread_t th; pthread_join(th, 0); - pthread_attr_init(0); pthread_cleanup_push(0, 0); - pthread_create(0,0,0,0); pthread_cleanup_pop(0); ], - [done=yes]) - - if test "x$done" = xyes; then - AC_MSG_RESULT([yes]) - PTHREAD_LIBS="-lpthread $PTHREAD_LIBS" - else - AC_MSG_RESULT([no]) - fi - fi - # - # FreeBSD 4.10 gcc forgets to use -lc_r instead of -lc - # - if test x"$done" = xno; then - AC_MSG_CHECKING([whether -lc_r fixes that]) - LIBS="-lc_r $PTHREAD_LIBS $save_LIBS" - AC_TRY_LINK([#include ], - [pthread_t th; pthread_join(th, 0); - pthread_attr_init(0); pthread_cleanup_push(0, 0); - pthread_create(0,0,0,0); pthread_cleanup_pop(0); ], - [done=yes]) - - if test "x$done" = xyes; then - AC_MSG_RESULT([yes]) - PTHREAD_LIBS="-lc_r $PTHREAD_LIBS" - else - AC_MSG_RESULT([no]) - fi - fi - if test x"$done" = xno; then - # OK, we have run out of ideas - AC_MSG_WARN([Impossible to determine how to use pthreads with shared libraries]) - - # so it's not safe to assume that we may use pthreads - acx_pthread_ok=no - fi - - CFLAGS="$save_CFLAGS" - LIBS="$save_LIBS" - CC="$save_CC" -else - PTHREAD_CC="$CC" -fi - -AC_SUBST(PTHREAD_LIBS) -AC_SUBST(PTHREAD_CFLAGS) -AC_SUBST(PTHREAD_CC) - -# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: -if test x"$acx_pthread_ok" = xyes; then - ifelse([$1],,AC_DEFINE(HAVE_PTHREAD,1,[Define if you have POSIX threads libraries and header files.]),[$1]) - : -else - acx_pthread_ok=no - $2 -fi -AC_LANG_RESTORE -])dnl ACX_PTHREAD diff --git a/src/sparsehash-1.6/m4/google_namespace.m4 b/src/sparsehash-1.6/m4/google_namespace.m4 deleted file mode 100644 index 7f244cc..0000000 --- a/src/sparsehash-1.6/m4/google_namespace.m4 +++ /dev/null @@ -1,42 +0,0 @@ -# Allow users to override the namespace we define our application's classes in -# Arg $1 is the default namespace to use if --enable-namespace isn't present. - -# In general, $1 should be 'google', so we put all our exported symbols in a -# unique namespace that is not likely to conflict with anyone else. However, -# when it makes sense -- for instance, when publishing stl-like code -- you -# may want to go with a different default, like 'std'. - -# We guarantee the invariant that GOOGLE_NAMESPACE starts with ::, -# unless it's the empty string. Thus, it's always safe to do -# GOOGLE_NAMESPACE::foo and be sure you're getting the foo that's -# actually in the google namespace, and not some other namespace that -# the namespace rules might kick in. - -AC_DEFUN([AC_DEFINE_GOOGLE_NAMESPACE], - [google_namespace_default=[$1] - AC_ARG_ENABLE(namespace, [ --enable-namespace=FOO to define these Google - classes in the FOO namespace. --disable-namespace - to define them in the global namespace. Default - is to define them in namespace $1.], - [case "$enableval" in - yes) google_namespace="$google_namespace_default" ;; - no) google_namespace="" ;; - *) google_namespace="$enableval" ;; - esac], - [google_namespace="$google_namespace_default"]) - if test -n "$google_namespace"; then - ac_google_namespace="::$google_namespace" - ac_google_start_namespace="namespace $google_namespace {" - ac_google_end_namespace="}" - else - ac_google_namespace="" - ac_google_start_namespace="" - ac_google_end_namespace="" - fi - AC_DEFINE_UNQUOTED(GOOGLE_NAMESPACE, $ac_google_namespace, - Namespace for Google classes) - AC_DEFINE_UNQUOTED(_START_GOOGLE_NAMESPACE_, $ac_google_start_namespace, - Puts following code inside the Google namespace) - AC_DEFINE_UNQUOTED(_END_GOOGLE_NAMESPACE_, $ac_google_end_namespace, - Stops putting the code inside the Google namespace) -]) diff --git a/src/sparsehash-1.6/m4/namespaces.m4 b/src/sparsehash-1.6/m4/namespaces.m4 deleted file mode 100644 index d78dbe4..0000000 --- a/src/sparsehash-1.6/m4/namespaces.m4 +++ /dev/null @@ -1,15 +0,0 @@ -# Checks whether the compiler implements namespaces -AC_DEFUN([AC_CXX_NAMESPACES], - [AC_CACHE_CHECK(whether the compiler implements namespaces, - ac_cv_cxx_namespaces, - [AC_LANG_SAVE - AC_LANG_CPLUSPLUS - AC_TRY_COMPILE([namespace Outer { - namespace Inner { int i = 0; }}], - [using namespace Outer::Inner; return i;], - ac_cv_cxx_namespaces=yes, - ac_cv_cxx_namespaces=no) - AC_LANG_RESTORE]) - if test "$ac_cv_cxx_namespaces" = yes; then - AC_DEFINE(HAVE_NAMESPACES, 1, [define if the compiler implements namespaces]) - fi]) diff --git a/src/sparsehash-1.6/m4/stl_hash.m4 b/src/sparsehash-1.6/m4/stl_hash.m4 deleted file mode 100644 index a31baab..0000000 --- a/src/sparsehash-1.6/m4/stl_hash.m4 +++ /dev/null @@ -1,70 +0,0 @@ -# We check two things: where the include file is for -# unordered_map/hash_map (we prefer the first form), and what -# namespace unordered/hash_map lives in within that include file. We -# include AC_TRY_COMPILE for all the combinations we've seen in the -# wild. We define HASH_MAP_H to the location of the header file, and -# HASH_NAMESPACE to the namespace the class (unordered_map or -# hash_map) is in. We define HAVE_UNORDERED_MAP if the class we found -# is named unordered_map, or leave it undefined if not. - -# This also checks if unordered map exists. -AC_DEFUN([AC_CXX_STL_HASH], - [AC_REQUIRE([AC_CXX_NAMESPACES]) - AC_MSG_CHECKING(the location of hash_map) - AC_LANG_SAVE - AC_LANG_CPLUSPLUS - ac_cv_cxx_hash_map="" - # First try unordered_map, but not on gcc's before 4.2 -- I've - # seen unexplainable unordered_map bugs with -O2 on older gcc's. - AC_TRY_COMPILE([#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 2)) - # error GCC too old for unordered_map - #endif - ], - [/* no program body necessary */], - [stl_hash_old_gcc=no], - [stl_hash_old_gcc=yes]) - for location in unordered_map tr1/unordered_map; do - for namespace in std std::tr1; do - if test -z "$ac_cv_cxx_hash_map" -a "$stl_hash_old_gcc" != yes; then - # Some older gcc's have a buggy tr1, so test a bit of code. - AC_TRY_COMPILE([#include <$location>], - [const ${namespace}::unordered_map t; - return t.find(5) == t.end();], - [ac_cv_cxx_hash_map="<$location>"; - ac_cv_cxx_hash_namespace="$namespace"; - ac_cv_cxx_have_unordered_map="yes";]) - fi - done - done - # Now try hash_map - for location in ext/hash_map hash_map; do - for namespace in __gnu_cxx "" std stdext; do - if test -z "$ac_cv_cxx_hash_map"; then - AC_TRY_COMPILE([#include <$location>], - [${namespace}::hash_map t], - [ac_cv_cxx_hash_map="<$location>"; - ac_cv_cxx_hash_namespace="$namespace"; - ac_cv_cxx_have_unordered_map="no";]) - fi - done - done - ac_cv_cxx_hash_set=`echo "$ac_cv_cxx_hash_map" | sed s/map/set/`; - if test -n "$ac_cv_cxx_hash_map"; then - AC_DEFINE(HAVE_HASH_MAP, 1, [define if the compiler has hash_map]) - AC_DEFINE(HAVE_HASH_SET, 1, [define if the compiler has hash_set]) - AC_DEFINE_UNQUOTED(HASH_MAP_H,$ac_cv_cxx_hash_map, - [the location of or ]) - AC_DEFINE_UNQUOTED(HASH_SET_H,$ac_cv_cxx_hash_set, - [the location of or ]) - AC_DEFINE_UNQUOTED(HASH_NAMESPACE,$ac_cv_cxx_hash_namespace, - [the namespace of hash_map/hash_set]) - if test "$ac_cv_cxx_have_unordered_map" = yes; then - AC_DEFINE(HAVE_UNORDERED_MAP,1, - [define if the compiler supports unordered_{map,set}]) - fi - AC_MSG_RESULT([$ac_cv_cxx_hash_map]) - else - AC_MSG_RESULT() - AC_MSG_WARN([could not find an STL hash_map]) - fi -]) diff --git a/src/sparsehash-1.6/m4/stl_hash_fun.m4 b/src/sparsehash-1.6/m4/stl_hash_fun.m4 deleted file mode 100644 index 962b088..0000000 --- a/src/sparsehash-1.6/m4/stl_hash_fun.m4 +++ /dev/null @@ -1,36 +0,0 @@ -# We just try to figure out where hash<> is defined. It's in some file -# that ends in hash_fun.h... -# -# Ideally we'd use AC_CACHE_CHECK, but that only lets us store one value -# at a time, and we need to store two (filename and namespace). -# prints messages itself, so we have to do the message-printing ourselves -# via AC_MSG_CHECKING + AC_MSG_RESULT. (TODO(csilvers): can we cache?) -# -# tr1/functional_hash.h: new gcc's with tr1 support -# stl_hash_fun.h: old gcc's (gc2.95?) -# ext/hash_fun.h: newer gcc's (gcc4) -# stl/_hash_fun.h: STLport - -AC_DEFUN([AC_CXX_STL_HASH_FUN], - [AC_REQUIRE([AC_CXX_STL_HASH]) - AC_MSG_CHECKING(how to include hash_fun directly) - AC_LANG_SAVE - AC_LANG_CPLUSPLUS - ac_cv_cxx_stl_hash_fun="" - for location in functional tr1/functional \ - ext/hash_fun.h ext/stl_hash_fun.h \ - hash_fun.h stl_hash_fun.h \ - stl/_hash_fun.h; do - if test -z "$ac_cv_cxx_stl_hash_fun"; then - AC_TRY_COMPILE([#include <$location>], - [int x = ${ac_cv_cxx_hash_namespace}::hash()(5)], - [ac_cv_cxx_stl_hash_fun="<$location>";]) - fi - done - AC_LANG_RESTORE - AC_DEFINE_UNQUOTED(HASH_FUN_H,$ac_cv_cxx_stl_hash_fun, - [the location of the header defining hash functions]) - AC_DEFINE_UNQUOTED(HASH_NAMESPACE,$ac_cv_cxx_hash_namespace, - [the namespace of the hash<> function]) - AC_MSG_RESULT([$ac_cv_cxx_stl_hash_fun]) -]) diff --git a/src/sparsehash-1.6/m4/stl_namespace.m4 b/src/sparsehash-1.6/m4/stl_namespace.m4 deleted file mode 100644 index 989ad80..0000000 --- a/src/sparsehash-1.6/m4/stl_namespace.m4 +++ /dev/null @@ -1,25 +0,0 @@ -# We check what namespace stl code like vector expects to be executed in - -AC_DEFUN([AC_CXX_STL_NAMESPACE], - [AC_CACHE_CHECK( - what namespace STL code is in, - ac_cv_cxx_stl_namespace, - [AC_REQUIRE([AC_CXX_NAMESPACES]) - AC_LANG_SAVE - AC_LANG_CPLUSPLUS - AC_TRY_COMPILE([#include ], - [vector t; return 0;], - ac_cv_cxx_stl_namespace=none) - AC_TRY_COMPILE([#include ], - [std::vector t; return 0;], - ac_cv_cxx_stl_namespace=std) - AC_LANG_RESTORE]) - if test "$ac_cv_cxx_stl_namespace" = none; then - AC_DEFINE(STL_NAMESPACE,, - [the namespace where STL code like vector<> is defined]) - fi - if test "$ac_cv_cxx_stl_namespace" = std; then - AC_DEFINE(STL_NAMESPACE,std, - [the namespace where STL code like vector<> is defined]) - fi -]) diff --git a/src/sparsehash-1.6/missing b/src/sparsehash-1.6/missing deleted file mode 100755 index 894e786..0000000 --- a/src/sparsehash-1.6/missing +++ /dev/null @@ -1,360 +0,0 @@ -#! /bin/sh -# Common stub for a few missing GNU programs while installing. - -scriptversion=2005-06-08.21 - -# Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003, 2004, 2005 -# Free Software Foundation, Inc. -# Originally by Fran,cois Pinard , 1996. - -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA -# 02110-1301, USA. - -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -if test $# -eq 0; then - echo 1>&2 "Try \`$0 --help' for more information" - exit 1 -fi - -run=: - -# In the cases where this matters, `missing' is being run in the -# srcdir already. -if test -f configure.ac; then - configure_ac=configure.ac -else - configure_ac=configure.in -fi - -msg="missing on your system" - -case "$1" in ---run) - # Try to run requested program, and just exit if it succeeds. - run= - shift - "$@" && exit 0 - # Exit code 63 means version mismatch. This often happens - # when the user try to use an ancient version of a tool on - # a file that requires a minimum version. In this case we - # we should proceed has if the program had been absent, or - # if --run hadn't been passed. - if test $? = 63; then - run=: - msg="probably too old" - fi - ;; - - -h|--h|--he|--hel|--help) - echo "\ -$0 [OPTION]... PROGRAM [ARGUMENT]... - -Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an -error status if there is no known handling for PROGRAM. - -Options: - -h, --help display this help and exit - -v, --version output version information and exit - --run try to run the given command, and emulate it if it fails - -Supported PROGRAM values: - aclocal touch file \`aclocal.m4' - autoconf touch file \`configure' - autoheader touch file \`config.h.in' - automake touch all \`Makefile.in' files - bison create \`y.tab.[ch]', if possible, from existing .[ch] - flex create \`lex.yy.c', if possible, from existing .c - help2man touch the output file - lex create \`lex.yy.c', if possible, from existing .c - makeinfo touch the output file - tar try tar, gnutar, gtar, then tar without non-portable flags - yacc create \`y.tab.[ch]', if possible, from existing .[ch] - -Send bug reports to ." - exit $? - ;; - - -v|--v|--ve|--ver|--vers|--versi|--versio|--version) - echo "missing $scriptversion (GNU Automake)" - exit $? - ;; - - -*) - echo 1>&2 "$0: Unknown \`$1' option" - echo 1>&2 "Try \`$0 --help' for more information" - exit 1 - ;; - -esac - -# Now exit if we have it, but it failed. Also exit now if we -# don't have it and --version was passed (most likely to detect -# the program). -case "$1" in - lex|yacc) - # Not GNU programs, they don't have --version. - ;; - - tar) - if test -n "$run"; then - echo 1>&2 "ERROR: \`tar' requires --run" - exit 1 - elif test "x$2" = "x--version" || test "x$2" = "x--help"; then - exit 1 - fi - ;; - - *) - if test -z "$run" && ($1 --version) > /dev/null 2>&1; then - # We have it, but it failed. - exit 1 - elif test "x$2" = "x--version" || test "x$2" = "x--help"; then - # Could not run --version or --help. This is probably someone - # running `$TOOL --version' or `$TOOL --help' to check whether - # $TOOL exists and not knowing $TOOL uses missing. - exit 1 - fi - ;; -esac - -# If it does not exist, or fails to run (possibly an outdated version), -# try to emulate it. -case "$1" in - aclocal*) - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified \`acinclude.m4' or \`${configure_ac}'. You might want - to install the \`Automake' and \`Perl' packages. Grab them from - any GNU archive site." - touch aclocal.m4 - ;; - - autoconf) - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified \`${configure_ac}'. You might want to install the - \`Autoconf' and \`GNU m4' packages. Grab them from any GNU - archive site." - touch configure - ;; - - autoheader) - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified \`acconfig.h' or \`${configure_ac}'. You might want - to install the \`Autoconf' and \`GNU m4' packages. Grab them - from any GNU archive site." - files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}` - test -z "$files" && files="config.h" - touch_files= - for f in $files; do - case "$f" in - *:*) touch_files="$touch_files "`echo "$f" | - sed -e 's/^[^:]*://' -e 's/:.*//'`;; - *) touch_files="$touch_files $f.in";; - esac - done - touch $touch_files - ;; - - automake*) - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'. - You might want to install the \`Automake' and \`Perl' packages. - Grab them from any GNU archive site." - find . -type f -name Makefile.am -print | - sed 's/\.am$/.in/' | - while read f; do touch "$f"; done - ;; - - autom4te) - echo 1>&2 "\ -WARNING: \`$1' is needed, but is $msg. - You might have modified some files without having the - proper tools for further handling them. - You can get \`$1' as part of \`Autoconf' from any GNU - archive site." - - file=`echo "$*" | sed -n 's/.*--output[ =]*\([^ ]*\).*/\1/p'` - test -z "$file" && file=`echo "$*" | sed -n 's/.*-o[ ]*\([^ ]*\).*/\1/p'` - if test -f "$file"; then - touch $file - else - test -z "$file" || exec >$file - echo "#! /bin/sh" - echo "# Created by GNU Automake missing as a replacement of" - echo "# $ $@" - echo "exit 0" - chmod +x $file - exit 1 - fi - ;; - - bison|yacc) - echo 1>&2 "\ -WARNING: \`$1' $msg. You should only need it if - you modified a \`.y' file. You may need the \`Bison' package - in order for those modifications to take effect. You can get - \`Bison' from any GNU archive site." - rm -f y.tab.c y.tab.h - if [ $# -ne 1 ]; then - eval LASTARG="\${$#}" - case "$LASTARG" in - *.y) - SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'` - if [ -f "$SRCFILE" ]; then - cp "$SRCFILE" y.tab.c - fi - SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'` - if [ -f "$SRCFILE" ]; then - cp "$SRCFILE" y.tab.h - fi - ;; - esac - fi - if [ ! -f y.tab.h ]; then - echo >y.tab.h - fi - if [ ! -f y.tab.c ]; then - echo 'main() { return 0; }' >y.tab.c - fi - ;; - - lex|flex) - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified a \`.l' file. You may need the \`Flex' package - in order for those modifications to take effect. You can get - \`Flex' from any GNU archive site." - rm -f lex.yy.c - if [ $# -ne 1 ]; then - eval LASTARG="\${$#}" - case "$LASTARG" in - *.l) - SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'` - if [ -f "$SRCFILE" ]; then - cp "$SRCFILE" lex.yy.c - fi - ;; - esac - fi - if [ ! -f lex.yy.c ]; then - echo 'main() { return 0; }' >lex.yy.c - fi - ;; - - help2man) - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified a dependency of a manual page. You may need the - \`Help2man' package in order for those modifications to take - effect. You can get \`Help2man' from any GNU archive site." - - file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'` - if test -z "$file"; then - file=`echo "$*" | sed -n 's/.*--output=\([^ ]*\).*/\1/p'` - fi - if [ -f "$file" ]; then - touch $file - else - test -z "$file" || exec >$file - echo ".ab help2man is required to generate this page" - exit 1 - fi - ;; - - makeinfo) - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified a \`.texi' or \`.texinfo' file, or any other file - indirectly affecting the aspect of the manual. The spurious - call might also be the consequence of using a buggy \`make' (AIX, - DU, IRIX). You might want to install the \`Texinfo' package or - the \`GNU make' package. Grab either from any GNU archive site." - # The file to touch is that specified with -o ... - file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'` - if test -z "$file"; then - # ... or it is the one specified with @setfilename ... - infile=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'` - file=`sed -n '/^@setfilename/ { s/.* \([^ ]*\) *$/\1/; p; q; }' $infile` - # ... or it is derived from the source name (dir/f.texi becomes f.info) - test -z "$file" && file=`echo "$infile" | sed 's,.*/,,;s,.[^.]*$,,'`.info - fi - # If the file does not exist, the user really needs makeinfo; - # let's fail without touching anything. - test -f $file || exit 1 - touch $file - ;; - - tar) - shift - - # We have already tried tar in the generic part. - # Look for gnutar/gtar before invocation to avoid ugly error - # messages. - if (gnutar --version > /dev/null 2>&1); then - gnutar "$@" && exit 0 - fi - if (gtar --version > /dev/null 2>&1); then - gtar "$@" && exit 0 - fi - firstarg="$1" - if shift; then - case "$firstarg" in - *o*) - firstarg=`echo "$firstarg" | sed s/o//` - tar "$firstarg" "$@" && exit 0 - ;; - esac - case "$firstarg" in - *h*) - firstarg=`echo "$firstarg" | sed s/h//` - tar "$firstarg" "$@" && exit 0 - ;; - esac - fi - - echo 1>&2 "\ -WARNING: I can't seem to be able to run \`tar' with the given arguments. - You may want to install GNU tar or Free paxutils, or check the - command line arguments." - exit 1 - ;; - - *) - echo 1>&2 "\ -WARNING: \`$1' is needed, and is $msg. - You might have modified some files without having the - proper tools for further handling them. Check the \`README' file, - it often tells you about the needed prerequisites for installing - this package. You may also peek at any GNU archive site, in case - some other package would contain this missing \`$1' program." - exit 1 - ;; -esac - -exit 0 - -# Local variables: -# eval: (add-hook 'write-file-hooks 'time-stamp) -# time-stamp-start: "scriptversion=" -# time-stamp-format: "%:y-%02m-%02d.%02H" -# time-stamp-end: "$" -# End: diff --git a/src/sparsehash-1.6/mkinstalldirs b/src/sparsehash-1.6/mkinstalldirs deleted file mode 100755 index 259dbfc..0000000 --- a/src/sparsehash-1.6/mkinstalldirs +++ /dev/null @@ -1,158 +0,0 @@ -#! /bin/sh -# mkinstalldirs --- make directory hierarchy - -scriptversion=2005-06-29.22 - -# Original author: Noah Friedman -# Created: 1993-05-16 -# Public domain. -# -# This file is maintained in Automake, please report -# bugs to or send patches to -# . - -errstatus=0 -dirmode= - -usage="\ -Usage: mkinstalldirs [-h] [--help] [--version] [-m MODE] DIR ... - -Create each directory DIR (with mode MODE, if specified), including all -leading file name components. - -Report bugs to ." - -# process command line arguments -while test $# -gt 0 ; do - case $1 in - -h | --help | --h*) # -h for help - echo "$usage" - exit $? - ;; - -m) # -m PERM arg - shift - test $# -eq 0 && { echo "$usage" 1>&2; exit 1; } - dirmode=$1 - shift - ;; - --version) - echo "$0 $scriptversion" - exit $? - ;; - --) # stop option processing - shift - break - ;; - -*) # unknown option - echo "$usage" 1>&2 - exit 1 - ;; - *) # first non-opt arg - break - ;; - esac -done - -for file -do - if test -d "$file"; then - shift - else - break - fi -done - -case $# in - 0) exit 0 ;; -esac - -# Solaris 8's mkdir -p isn't thread-safe. If you mkdir -p a/b and -# mkdir -p a/c at the same time, both will detect that a is missing, -# one will create a, then the other will try to create a and die with -# a "File exists" error. This is a problem when calling mkinstalldirs -# from a parallel make. We use --version in the probe to restrict -# ourselves to GNU mkdir, which is thread-safe. -case $dirmode in - '') - if mkdir -p --version . >/dev/null 2>&1 && test ! -d ./--version; then - echo "mkdir -p -- $*" - exec mkdir -p -- "$@" - else - # On NextStep and OpenStep, the `mkdir' command does not - # recognize any option. It will interpret all options as - # directories to create, and then abort because `.' already - # exists. - test -d ./-p && rmdir ./-p - test -d ./--version && rmdir ./--version - fi - ;; - *) - if mkdir -m "$dirmode" -p --version . >/dev/null 2>&1 && - test ! -d ./--version; then - echo "mkdir -m $dirmode -p -- $*" - exec mkdir -m "$dirmode" -p -- "$@" - else - # Clean up after NextStep and OpenStep mkdir. - for d in ./-m ./-p ./--version "./$dirmode"; - do - test -d $d && rmdir $d - done - fi - ;; -esac - -for file -do - case $file in - /*) pathcomp=/ ;; - *) pathcomp= ;; - esac - oIFS=$IFS - IFS=/ - set fnord $file - shift - IFS=$oIFS - - for d - do - test "x$d" = x && continue - - pathcomp=$pathcomp$d - case $pathcomp in - -*) pathcomp=./$pathcomp ;; - esac - - if test ! -d "$pathcomp"; then - echo "mkdir $pathcomp" - - mkdir "$pathcomp" || lasterr=$? - - if test ! -d "$pathcomp"; then - errstatus=$lasterr - else - if test ! -z "$dirmode"; then - echo "chmod $dirmode $pathcomp" - lasterr= - chmod "$dirmode" "$pathcomp" || lasterr=$? - - if test ! -z "$lasterr"; then - errstatus=$lasterr - fi - fi - fi - fi - - pathcomp=$pathcomp/ - done -done - -exit $errstatus - -# Local Variables: -# mode: shell-script -# sh-indentation: 2 -# eval: (add-hook 'write-file-hooks 'time-stamp) -# time-stamp-start: "scriptversion=" -# time-stamp-format: "%:y-%02m-%02d.%02H" -# time-stamp-end: "$" -# End: diff --git a/src/sparsehash-1.6/packages/deb.sh b/src/sparsehash-1.6/packages/deb.sh deleted file mode 100755 index 31b423c..0000000 --- a/src/sparsehash-1.6/packages/deb.sh +++ /dev/null @@ -1,74 +0,0 @@ -#!/bin/bash -e - -# This takes one commandline argument, the name of the package. If no -# name is given, then we'll end up just using the name associated with -# an arbitrary .tar.gz file in the rootdir. That's fine: there's probably -# only one. -# -# Run this from the 'packages' directory, just under rootdir - -## Set LIB to lib if exporting a library, empty-string else -LIB= -#LIB=lib - -PACKAGE="$1" -VERSION="$2" - -# We can only build Debian packages, if the Debian build tools are installed -if [ \! -x /usr/bin/debuild ]; then - echo "Cannot find /usr/bin/debuild. Not building Debian packages." 1>&2 - exit 0 -fi - -# Double-check we're in the packages directory, just under rootdir -if [ \! -r ../Makefile -a \! -r ../INSTALL ]; then - echo "Must run $0 in the 'packages' directory, under the root directory." 1>&2 - echo "Also, you must run \"make dist\" before running this script." 1>&2 - exit 0 -fi - -# Find the top directory for this package -topdir="${PWD%/*}" - -# Find the tar archive built by "make dist" -archive="${PACKAGE}-${VERSION}" -archive_with_underscore="${PACKAGE}_${VERSION}" -if [ -z "${archive}" ]; then - echo "Cannot find ../$PACKAGE*.tar.gz. Run \"make dist\" first." 1>&2 - exit 0 -fi - -# Create a pristine directory for building the Debian package files -trap 'rm -rf '`pwd`/tmp'; exit $?' EXIT SIGHUP SIGINT SIGTERM - -rm -rf tmp -mkdir -p tmp -cd tmp - -# Debian has very specific requirements about the naming of build -# directories, and tar archives. It also wants to write all generated -# packages to the parent of the source directory. We accommodate these -# requirements by building directly from the tar file. -ln -s "${topdir}/${archive}.tar.gz" "${LIB}${archive}.orig.tar.gz" -# Some version of debuilder want foo.orig.tar.gz with _ between versions. -ln -s "${topdir}/${archive}.tar.gz" "${LIB}${archive_with_underscore}.orig.tar.gz" -tar zfx "${LIB}${archive}.orig.tar.gz" -[ -n "${LIB}" ] && mv "${archive}" "${LIB}${archive}" -cd "${LIB}${archive}" -# This is one of those 'specific requirements': where the deb control files live -cp -a "packages/deb" "debian" - -# Now, we can call Debian's standard build tool -debuild -uc -us -cd ../.. # get back to the original top-level dir - -# We'll put the result in a subdirectory that's named after the OS version -# we've made this .deb file for. -destdir="debian-$(cat /etc/debian_version 2>/dev/null || echo UNKNOWN)" - -rm -rf "$destdir" -mkdir -p "$destdir" -mv $(find tmp -mindepth 1 -maxdepth 1 -type f) "$destdir" - -echo -echo "The Debian package files are located in $PWD/$destdir" diff --git a/src/sparsehash-1.6/packages/deb/README b/src/sparsehash-1.6/packages/deb/README deleted file mode 100644 index 57becfd..0000000 --- a/src/sparsehash-1.6/packages/deb/README +++ /dev/null @@ -1,7 +0,0 @@ -The list of files here isn't complete. For a step-by-step guide on -how to set this package up correctly, check out - http://www.debian.org/doc/maint-guide/ - -Most of the files that are in this directory are boilerplate. -However, you may need to change the list of binary-arch dependencies -in 'rules'. diff --git a/src/sparsehash-1.6/packages/deb/changelog b/src/sparsehash-1.6/packages/deb/changelog deleted file mode 100644 index 7786c37..0000000 --- a/src/sparsehash-1.6/packages/deb/changelog +++ /dev/null @@ -1,113 +0,0 @@ -sparsehash (1.6-1) unstable; urgency=low - - * New upstream release. - - -- Google Inc. Fri, 08 Jan 2010 14:47:55 -0800 - -sparsehash (1.5.2-1) unstable; urgency=low - - * New upstream release. - - -- Google Inc. Tue, 12 May 2009 14:16:38 -0700 - -sparsehash (1.5.1-1) unstable; urgency=low - - * New upstream release. - - -- Google Inc. Fri, 08 May 2009 15:23:44 -0700 - -sparsehash (1.5-1) unstable; urgency=low - - * New upstream release. - - -- Google Inc. Wed, 06 May 2009 11:28:49 -0700 - -sparsehash (1.4-1) unstable; urgency=low - - * New upstream release. - - -- Google Inc. Wed, 28 Jan 2009 17:11:31 -0800 - -sparsehash (1.3-1) unstable; urgency=low - - * New upstream release. - - -- Google Inc. Thu, 06 Nov 2008 15:06:09 -0800 - -sparsehash (1.2-1) unstable; urgency=low - - * New upstream release. - - -- Google Inc. Thu, 18 Sep 2008 13:53:20 -0700 - -sparsehash (1.1-1) unstable; urgency=low - - * New upstream release. - - -- Google Inc. Mon, 11 Feb 2008 16:30:11 -0800 - -sparsehash (1.0-1) unstable; urgency=low - - * New upstream release. We are now out of beta. - - -- Google Inc. Tue, 13 Nov 2007 15:15:46 -0800 - -sparsehash (0.9.1-1) unstable; urgency=low - - * New upstream release. - - -- Google Inc. Fri, 12 Oct 2007 12:35:24 -0700 - -sparsehash (0.9-1) unstable; urgency=low - - * New upstream release. - - -- Google Inc. Tue, 09 Oct 2007 14:15:21 -0700 - -sparsehash (0.8-1) unstable; urgency=low - - * New upstream release. - - -- Google Inc. Tue, 03 Jul 2007 12:55:04 -0700 - -sparsehash (0.7-1) unstable; urgency=low - - * New upstream release. - - -- Google Inc. Mon, 11 Jun 2007 11:33:41 -0700 - -sparsehash (0.6-1) unstable; urgency=low - - * New upstream release. - - -- Google Inc. Tue, 20 Mar 2007 17:29:34 -0700 - -sparsehash (0.5-1) unstable; urgency=low - - * New upstream release. - - -- Google Inc. Sat, 21 Oct 2006 13:47:47 -0700 - -sparsehash (0.4-1) unstable; urgency=low - - * New upstream release. - - -- Google Inc. Sun, 23 Apr 2006 22:42:35 -0700 - -sparsehash (0.3-1) unstable; urgency=low - - * New upstream release. - - -- Google Inc. Thu, 03 Nov 2005 20:12:31 -0800 - -sparsehash (0.2-1) unstable; urgency=low - - * New upstream release. - - -- Google Inc. Mon, 02 May 2005 07:04:46 -0700 - -sparsehash (0.1-1) unstable; urgency=low - - * Initial release. - - -- Google Inc. Tue, 15 Feb 2005 07:17:02 -0800 diff --git a/src/sparsehash-1.6/packages/deb/compat b/src/sparsehash-1.6/packages/deb/compat deleted file mode 100644 index b8626c4..0000000 --- a/src/sparsehash-1.6/packages/deb/compat +++ /dev/null @@ -1 +0,0 @@ -4 diff --git a/src/sparsehash-1.6/packages/deb/control b/src/sparsehash-1.6/packages/deb/control deleted file mode 100644 index c387952..0000000 --- a/src/sparsehash-1.6/packages/deb/control +++ /dev/null @@ -1,17 +0,0 @@ -Source: sparsehash -Section: libdevel -Priority: optional -Maintainer: Google Inc. -Build-Depends: debhelper (>= 4.0.0) -Standards-Version: 3.6.1 - -Package: sparsehash -Section: libs -Architecture: any -Description: hash_map and hash_set classes with minimal space overhead - This package contains several hash-map implementations, similar - in API to SGI's hash_map class, but with different performance - characteristics. sparse_hash_map uses very little space overhead: 1-2 - bits per entry. dense_hash_map is typically faster than the default - SGI STL implementation. This package also includes hash-set analogues - of these classes. diff --git a/src/sparsehash-1.6/packages/deb/copyright b/src/sparsehash-1.6/packages/deb/copyright deleted file mode 100644 index 725a37d..0000000 --- a/src/sparsehash-1.6/packages/deb/copyright +++ /dev/null @@ -1,35 +0,0 @@ -This package was debianized by Google Inc. on -15 February 2005. - -It was downloaded from http://code.google.com/ - -Upstream Author: opensource@google.com - -Copyright (c) 2005, Google Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/sparsehash-1.6/packages/deb/docs b/src/sparsehash-1.6/packages/deb/docs deleted file mode 100644 index 752adb4..0000000 --- a/src/sparsehash-1.6/packages/deb/docs +++ /dev/null @@ -1,16 +0,0 @@ -AUTHORS -COPYING -ChangeLog -INSTALL -NEWS -README -TODO -doc/dense_hash_map.html -doc/dense_hash_set.html -doc/sparse_hash_map.html -doc/sparse_hash_set.html -doc/sparsetable.html -doc/implementation.html -doc/performance.html -doc/index.html -doc/designstyle.css diff --git a/src/sparsehash-1.6/packages/deb/rules b/src/sparsehash-1.6/packages/deb/rules deleted file mode 100755 index f520bef..0000000 --- a/src/sparsehash-1.6/packages/deb/rules +++ /dev/null @@ -1,117 +0,0 @@ -#!/usr/bin/make -f -# -*- makefile -*- -# Sample debian/rules that uses debhelper. -# This file was originally written by Joey Hess and Craig Small. -# As a special exception, when this file is copied by dh-make into a -# dh-make output file, you may use that output file without restriction. -# This special exception was added by Craig Small in version 0.37 of dh-make. - -# Uncomment this to turn on verbose mode. -#export DH_VERBOSE=1 - - -# These are used for cross-compiling and for saving the configure script -# from having to guess our platform (since we know it already) -DEB_HOST_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE) -DEB_BUILD_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE) - - -CFLAGS = -Wall -g - -ifneq (,$(findstring noopt,$(DEB_BUILD_OPTIONS))) - CFLAGS += -O0 -else - CFLAGS += -O2 -endif -ifeq (,$(findstring nostrip,$(DEB_BUILD_OPTIONS))) - INSTALL_PROGRAM += -s -endif - -# shared library versions, option 1 -#version=2.0.5 -#major=2 -# option 2, assuming the library is created as src/.libs/libfoo.so.2.0.5 or so -version=`ls src/.libs/lib*.so.* | \ - awk '{if (match($$0,/[0-9]+\.[0-9]+\.[0-9]+$$/)) print substr($$0,RSTART)}'` -major=`ls src/.libs/lib*.so.* | \ - awk '{if (match($$0,/\.so\.[0-9]+$$/)) print substr($$0,RSTART+4)}'` - -config.status: configure - dh_testdir - # Add here commands to configure the package. - CFLAGS="$(CFLAGS)" ./configure --host=$(DEB_HOST_GNU_TYPE) --build=$(DEB_BUILD_GNU_TYPE) --prefix=/usr --mandir=\$${prefix}/share/man --infodir=\$${prefix}/share/info - - -build: build-stamp -build-stamp: config.status - dh_testdir - - # Add here commands to compile the package. - $(MAKE) - - touch build-stamp - -clean: - dh_testdir - dh_testroot - rm -f build-stamp - - # Add here commands to clean up after the build process. - -$(MAKE) distclean -ifneq "$(wildcard /usr/share/misc/config.sub)" "" - cp -f /usr/share/misc/config.sub config.sub -endif -ifneq "$(wildcard /usr/share/misc/config.guess)" "" - cp -f /usr/share/misc/config.guess config.guess -endif - - - dh_clean - -install: build - dh_testdir - dh_testroot - dh_clean -k - dh_installdirs - - # Add here commands to install the package into debian/tmp - $(MAKE) install DESTDIR=$(CURDIR)/debian/tmp - - -# Build architecture-independent files here. -binary-indep: build install -# We have nothing to do by default. - -# Build architecture-dependent files here. -binary-arch: build install - dh_testdir - dh_testroot - dh_installchangelogs ChangeLog - dh_installdocs - dh_installexamples - dh_install --sourcedir=debian/tmp -# dh_installmenu -# dh_installdebconf -# dh_installlogrotate -# dh_installemacsen -# dh_installpam -# dh_installmime -# dh_installinit -# dh_installcron -# dh_installinfo - dh_installman - dh_link - dh_strip - dh_compress - dh_fixperms -# dh_perl -# dh_python - dh_makeshlibs - dh_installdeb - dh_shlibdeps - dh_gencontrol - dh_md5sums - dh_builddeb - -binary: binary-indep binary-arch -.PHONY: build clean binary-indep binary-arch binary install diff --git a/src/sparsehash-1.6/packages/deb/sparsehash.dirs b/src/sparsehash-1.6/packages/deb/sparsehash.dirs deleted file mode 100644 index 4f17144..0000000 --- a/src/sparsehash-1.6/packages/deb/sparsehash.dirs +++ /dev/null @@ -1,2 +0,0 @@ -usr/include -usr/include/google diff --git a/src/sparsehash-1.6/packages/deb/sparsehash.install b/src/sparsehash-1.6/packages/deb/sparsehash.install deleted file mode 100644 index 9625860..0000000 --- a/src/sparsehash-1.6/packages/deb/sparsehash.install +++ /dev/null @@ -1,2 +0,0 @@ -usr/include/google/* -debian/tmp/usr/include/google/* diff --git a/src/sparsehash-1.6/packages/rpm.sh b/src/sparsehash-1.6/packages/rpm.sh deleted file mode 100755 index a655080..0000000 --- a/src/sparsehash-1.6/packages/rpm.sh +++ /dev/null @@ -1,86 +0,0 @@ -#!/bin/sh -e - -# Run this from the 'packages' directory, just under rootdir - -# We can only build rpm packages, if the rpm build tools are installed -if [ \! -x /usr/bin/rpmbuild ] -then - echo "Cannot find /usr/bin/rpmbuild. Not building an rpm." 1>&2 - exit 0 -fi - -# Check the commandline flags -PACKAGE="$1" -VERSION="$2" -fullname="${PACKAGE}-${VERSION}" -archive=../$fullname.tar.gz - -if [ -z "$1" -o -z "$2" ] -then - echo "Usage: $0 " 1>&2 - exit 0 -fi - -# Double-check we're in the packages directory, just under rootdir -if [ \! -r ../Makefile -a \! -r ../INSTALL ] -then - echo "Must run $0 in the 'packages' directory, under the root directory." 1>&2 - echo "Also, you must run \"make dist\" before running this script." 1>&2 - exit 0 -fi - -if [ \! -r "$archive" ] -then - echo "Cannot find $archive. Run \"make dist\" first." 1>&2 - exit 0 -fi - -# Create the directory where the input lives, and where the output should live -RPM_SOURCE_DIR="/tmp/rpmsource-$fullname" -RPM_BUILD_DIR="/tmp/rpmbuild-$fullname" - -trap 'rm -rf $RPM_SOURCE_DIR $RPM_BUILD_DIR; exit $?' EXIT SIGHUP SIGINT SIGTERM - -rm -rf "$RPM_SOURCE_DIR" "$RPM_BUILD_DIR" -mkdir "$RPM_SOURCE_DIR" -mkdir "$RPM_BUILD_DIR" - -cp "$archive" "$RPM_SOURCE_DIR" - -# rpmbuild -- as far as I can tell -- asks the OS what CPU it has. -# This may differ from what kind of binaries gcc produces. dpkg -# does a better job of this, so if we can run 'dpkg --print-architecture' -# to get the build CPU, we use that in preference of the rpmbuild -# default. -target=`dpkg --print-architecture 2>/dev/null` # "" if dpkg isn't found -if [ -n "$target" ] -then - target=" --target $target" -fi - -rpmbuild -bb rpm/rpm.spec $target \ - --define "NAME $PACKAGE" \ - --define "VERSION $VERSION" \ - --define "_sourcedir $RPM_SOURCE_DIR" \ - --define "_builddir $RPM_BUILD_DIR" \ - --define "_rpmdir $RPM_SOURCE_DIR" - -# We put the output in a directory based on what system we've built for -destdir=rpm-unknown -if [ -r /etc/issue ] -then - grep "Red Hat.*release 7" /etc/issue >/dev/null 2>&1 && destdir=rh7 - grep "Red Hat.*release 8" /etc/issue >/dev/null 2>&1 && destdir=rh8 - grep "Red Hat.*release 9" /etc/issue >/dev/null 2>&1 && destdir=rh9 - grep "Fedora Core.*release 1" /etc/issue >/dev/null 2>&1 && destdir=fc1 - grep "Fedora Core.*release 2" /etc/issue >/dev/null 2>&1 && destdir=fc2 - grep "Fedora Core.*release 3" /etc/issue >/dev/null 2>&1 && destdir=fc3 -fi - -rm -rf "$destdir" -mkdir -p "$destdir" -# We want to get not only the main package but devel etc, hence the middle * -mv "$RPM_SOURCE_DIR"/*/"${PACKAGE}"-*"${VERSION}"*.rpm "$destdir" - -echo -echo "The rpm package file(s) are located in $PWD/$destdir" diff --git a/src/sparsehash-1.6/packages/rpm/rpm.spec b/src/sparsehash-1.6/packages/rpm/rpm.spec deleted file mode 100644 index f412efe..0000000 --- a/src/sparsehash-1.6/packages/rpm/rpm.spec +++ /dev/null @@ -1,61 +0,0 @@ -%define RELEASE 1 -%define rel %{?CUSTOM_RELEASE} %{!?CUSTOM_RELEASE:%RELEASE} -%define prefix /usr - -Name: %NAME -Summary: hash_map and hash_set classes with minimal space overhead -Version: %VERSION -Release: %rel -Group: Development/Libraries -URL: http://code.google.com/p/google-sparsehash -License: BSD -Vendor: Google -Packager: Google -Source: http://%{NAME}.googlecode.com/files/%{NAME}-%{VERSION}.tar.gz -Distribution: Redhat 7 and above. -Buildroot: %{_tmppath}/%{name}-root -Prefix: %prefix -Buildarch: noarch - -%description -The %name package contains several hash-map implementations, similar -in API to the SGI hash_map class, but with different performance -characteristics. sparse_hash_map uses very little space overhead: 1-2 -bits per entry. dense_hash_map is typically faster than the default -SGI STL implementation. This package also includes hash-set analogues -of these classes. - -%changelog - * Wed Apr 22 2009 - - Change build rule to use %configure instead of ./configure - - Change install to use DESTDIR instead of prefix for make install - - Use wildcards for doc/ and lib/ directories - - Use {_includedir} instead of {prefix}/include - - * Fri Jan 14 2005 - - First draft - -%prep -%setup - -%build -# I can't use '% configure', because it defines -m32 which breaks on -# my development environment for some reason. But I do take -# as much from % configure (in /usr/lib/rpm/macros) as I can. -./configure --prefix=%{_prefix} --exec-prefix=%{_exec_prefix} --bindir=%{_bindir} --sbindir=%{_sbindir} --sysconfdir=%{_sysconfdir} --datadir=%{_datadir} --includedir=%{_includedir} --libdir=%{_libdir} --libexecdir=%{_libexecdir} --localstatedir=%{_localstatedir} --sharedstatedir=%{_sharedstatedir} --mandir=%{_mandir} --infodir=%{_infodir} -make - -%install -rm -rf $RPM_BUILD_ROOT -make DESTDIR=$RPM_BUILD_ROOT install - -%clean -rm -rf $RPM_BUILD_ROOT - -%files -%defattr(-,root,root) - -%docdir %{prefix}/share/doc/%{NAME}-%{VERSION} -%{prefix}/share/doc/%{NAME}-%{VERSION}/* - -%{_includedir}/google diff --git a/src/sparsehash-1.6/simple_test b/src/sparsehash-1.6/simple_test deleted file mode 100755 index e2b9a84..0000000 Binary files a/src/sparsehash-1.6/simple_test and /dev/null differ diff --git a/src/sparsehash-1.6/sparsetable_unittest b/src/sparsehash-1.6/sparsetable_unittest deleted file mode 100755 index 39eeecf..0000000 Binary files a/src/sparsehash-1.6/sparsetable_unittest and /dev/null differ diff --git a/src/sparsehash-1.6/src/config.h b/src/sparsehash-1.6/src/config.h deleted file mode 100644 index 4183665..0000000 --- a/src/sparsehash-1.6/src/config.h +++ /dev/null @@ -1,132 +0,0 @@ -/* src/config.h. Generated from config.h.in by configure. */ -/* src/config.h.in. Generated from configure.ac by autoheader. */ - -/* Namespace for Google classes */ -#define GOOGLE_NAMESPACE ::google - -/* the location of the header defining hash functions */ -#define HASH_FUN_H - -/* the location of or */ -#define HASH_MAP_H - -/* the namespace of the hash<> function */ -#define HASH_NAMESPACE std::tr1 - -/* the location of or */ -#define HASH_SET_H - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_GOOGLE_MALLOC_EXTENSION_H */ - -/* define if the compiler has hash_map */ -#define HAVE_HASH_MAP 1 - -/* define if the compiler has hash_set */ -#define HAVE_HASH_SET 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_INTTYPES_H 1 - -/* Define to 1 if the system has the type `long long'. */ -#define HAVE_LONG_LONG 1 - -/* Define to 1 if you have the `memcpy' function. */ -#define HAVE_MEMCPY 1 - -/* Define to 1 if you have the `memmove' function. */ -#define HAVE_MEMMOVE 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_MEMORY_H 1 - -/* define if the compiler implements namespaces */ -#define HAVE_NAMESPACES 1 - -/* Define if you have POSIX threads libraries and header files. */ -/* #undef HAVE_PTHREAD */ - -/* Define to 1 if you have the header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDLIB_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRINGS_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRING_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_RESOURCE_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_STAT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TIME_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TYPES_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_UTSNAME_H 1 - -/* Define to 1 if the system has the type `uint16_t'. */ -#define HAVE_UINT16_T 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_UNISTD_H 1 - -/* define if the compiler supports unordered_{map,set} */ -#define HAVE_UNORDERED_MAP 1 - -/* Define to 1 if the system has the type `u_int16_t'. */ -#define HAVE_U_INT16_T 1 - -/* Define to 1 if the system has the type `__uint16'. */ -/* #undef HAVE___UINT16 */ - -/* Name of package */ -#define PACKAGE "sparsehash" - -/* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "opensource@google.com" - -/* Define to the full name of this package. */ -#define PACKAGE_NAME "sparsehash" - -/* Define to the full name and version of this package. */ -#define PACKAGE_STRING "sparsehash 1.6" - -/* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME "sparsehash" - -/* Define to the version of this package. */ -#define PACKAGE_VERSION "1.6" - -/* Define to necessary symbol if this constant uses a non-standard name on - your system. */ -/* #undef PTHREAD_CREATE_JOINABLE */ - -/* The system-provided hash function including the namespace. */ -#define SPARSEHASH_HASH HASH_NAMESPACE::hash - -/* The system-provided hash function, in namespace HASH_NAMESPACE. */ -#define SPARSEHASH_HASH_NO_NAMESPACE hash - -/* Define to 1 if you have the ANSI C header files. */ -#define STDC_HEADERS 1 - -/* the namespace where STL code like vector<> is defined */ -#define STL_NAMESPACE std - -/* Version number of package */ -#define VERSION "1.6" - -/* Stops putting the code inside the Google namespace */ -#define _END_GOOGLE_NAMESPACE_ } - -/* Puts following code inside the Google namespace */ -#define _START_GOOGLE_NAMESPACE_ namespace google { diff --git a/src/sparsehash-1.6/src/config.h.in b/src/sparsehash-1.6/src/config.h.in deleted file mode 100644 index bef1650..0000000 --- a/src/sparsehash-1.6/src/config.h.in +++ /dev/null @@ -1,131 +0,0 @@ -/* src/config.h.in. Generated from configure.ac by autoheader. */ - -/* Namespace for Google classes */ -#undef GOOGLE_NAMESPACE - -/* the location of the header defining hash functions */ -#undef HASH_FUN_H - -/* the location of or */ -#undef HASH_MAP_H - -/* the namespace of the hash<> function */ -#undef HASH_NAMESPACE - -/* the location of or */ -#undef HASH_SET_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_GOOGLE_MALLOC_EXTENSION_H - -/* define if the compiler has hash_map */ -#undef HAVE_HASH_MAP - -/* define if the compiler has hash_set */ -#undef HAVE_HASH_SET - -/* Define to 1 if you have the header file. */ -#undef HAVE_INTTYPES_H - -/* Define to 1 if the system has the type `long long'. */ -#undef HAVE_LONG_LONG - -/* Define to 1 if you have the `memcpy' function. */ -#undef HAVE_MEMCPY - -/* Define to 1 if you have the `memmove' function. */ -#undef HAVE_MEMMOVE - -/* Define to 1 if you have the header file. */ -#undef HAVE_MEMORY_H - -/* define if the compiler implements namespaces */ -#undef HAVE_NAMESPACES - -/* Define if you have POSIX threads libraries and header files. */ -#undef HAVE_PTHREAD - -/* Define to 1 if you have the header file. */ -#undef HAVE_STDINT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STDLIB_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STRINGS_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STRING_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_RESOURCE_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_STAT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_TIME_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_TYPES_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_UTSNAME_H - -/* Define to 1 if the system has the type `uint16_t'. */ -#undef HAVE_UINT16_T - -/* Define to 1 if you have the header file. */ -#undef HAVE_UNISTD_H - -/* define if the compiler supports unordered_{map,set} */ -#undef HAVE_UNORDERED_MAP - -/* Define to 1 if the system has the type `u_int16_t'. */ -#undef HAVE_U_INT16_T - -/* Define to 1 if the system has the type `__uint16'. */ -#undef HAVE___UINT16 - -/* Name of package */ -#undef PACKAGE - -/* Define to the address where bug reports for this package should be sent. */ -#undef PACKAGE_BUGREPORT - -/* Define to the full name of this package. */ -#undef PACKAGE_NAME - -/* Define to the full name and version of this package. */ -#undef PACKAGE_STRING - -/* Define to the one symbol short name of this package. */ -#undef PACKAGE_TARNAME - -/* Define to the version of this package. */ -#undef PACKAGE_VERSION - -/* Define to necessary symbol if this constant uses a non-standard name on - your system. */ -#undef PTHREAD_CREATE_JOINABLE - -/* The system-provided hash function including the namespace. */ -#undef SPARSEHASH_HASH - -/* The system-provided hash function, in namespace HASH_NAMESPACE. */ -#undef SPARSEHASH_HASH_NO_NAMESPACE - -/* Define to 1 if you have the ANSI C header files. */ -#undef STDC_HEADERS - -/* the namespace where STL code like vector<> is defined */ -#undef STL_NAMESPACE - -/* Version number of package */ -#undef VERSION - -/* Stops putting the code inside the Google namespace */ -#undef _END_GOOGLE_NAMESPACE_ - -/* Puts following code inside the Google namespace */ -#undef _START_GOOGLE_NAMESPACE_ diff --git a/src/sparsehash-1.6/src/config.h.include b/src/sparsehash-1.6/src/config.h.include deleted file mode 100644 index 5b0e3a2..0000000 --- a/src/sparsehash-1.6/src/config.h.include +++ /dev/null @@ -1,23 +0,0 @@ -/*** - *** These are #defines that autoheader puts in config.h.in that we - *** want to show up in sparseconfig.h, the minimal config.h file - *** #included by all our .h files. The reason we don't take - *** everything that autoheader emits is that we have to include a - *** config.h in installed header files, and we want to minimize the - *** number of #defines we make so as to not pollute the namespace. - ***/ -GOOGLE_NAMESPACE -HASH_NAMESPACE -HASH_FUN_H -SPARSEHASH_HASH -HAVE_UINT16_T -HAVE_U_INT16_T -HAVE___UINT16 -HAVE_LONG_LONG -HAVE_SYS_TYPES_H -HAVE_STDINT_H -HAVE_INTTYPES_H -HAVE_MEMCPY -STL_NAMESPACE -_END_GOOGLE_NAMESPACE_ -_START_GOOGLE_NAMESPACE_ diff --git a/src/sparsehash-1.6/src/google/dense_hash_map b/src/sparsehash-1.6/src/google/dense_hash_map deleted file mode 100644 index 888643a..0000000 --- a/src/sparsehash-1.6/src/google/dense_hash_map +++ /dev/null @@ -1,319 +0,0 @@ -// Copyright (c) 2005, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// ---- -// Author: Craig Silverstein -// -// This is just a very thin wrapper over densehashtable.h, just -// like sgi stl's stl_hash_map is a very thin wrapper over -// stl_hashtable. The major thing we define is operator[], because -// we have a concept of a data_type which stl_hashtable doesn't -// (it only has a key and a value). -// -// NOTE: this is exactly like sparse_hash_map.h, with the word -// "sparse" replaced by "dense", except for the addition of -// set_empty_key(). -// -// YOU MUST CALL SET_EMPTY_KEY() IMMEDIATELY AFTER CONSTRUCTION. -// -// Otherwise your program will die in mysterious ways. -// -// In other respects, we adhere mostly to the STL semantics for -// hash-map. One important exception is that insert() may invalidate -// iterators entirely -- STL semantics are that insert() may reorder -// iterators, but they all still refer to something valid in the -// hashtable. Not so for us. Likewise, insert() may invalidate -// pointers into the hashtable. (Whether insert invalidates iterators -// and pointers depends on whether it results in a hashtable resize). -// On the plus side, delete() doesn't invalidate iterators or pointers -// at all, or even change the ordering of elements. -// -// Here are a few "power user" tips: -// -// 1) set_deleted_key(): -// If you want to use erase() you *must* call set_deleted_key(), -// in addition to set_empty_key(), after construction. -// The deleted and empty keys must differ. -// -// 2) resize(0): -// When an item is deleted, its memory isn't freed right -// away. This allows you to iterate over a hashtable, -// and call erase(), without invalidating the iterator. -// To force the memory to be freed, call resize(0). -// For tr1 compatibility, this can also be called as rehash(0). -// -// 3) min_load_factor(0.0) -// Setting the minimum load factor to 0.0 guarantees that -// the hash table will never shrink. -// -// Roughly speaking: -// (1) dense_hash_map: fastest, uses the most memory unless entries are small -// (2) sparse_hash_map: slowest, uses the least memory -// (3) hash_map / unordered_map (STL): in the middle -// -// Typically I use sparse_hash_map when I care about space and/or when -// I need to save the hashtable on disk. I use hash_map otherwise. I -// don't personally use dense_hash_set ever; some people use it for -// small sets with lots of lookups. -// -// - dense_hash_map has, typically, about 78% memory overhead (if your -// data takes up X bytes, the hash_map uses .78X more bytes in overhead). -// - sparse_hash_map has about 4 bits overhead per entry. -// - sparse_hash_map can be 3-7 times slower than the others for lookup and, -// especially, inserts. See time_hash_map.cc for details. -// -// See /usr/(local/)?doc/sparsehash-*/dense_hash_map.html -// for information about how to use this class. - -#ifndef _DENSE_HASH_MAP_H_ -#define _DENSE_HASH_MAP_H_ - -#include -#include // for FILE * in read()/write() -#include // for the default template args -#include // for equal_to -#include // for alloc<> -#include // for pair<> -#include HASH_FUN_H // defined in config.h -#include - - -_START_GOOGLE_NAMESPACE_ - -using STL_NAMESPACE::pair; - -template , // defined in sparseconfig.h - class EqualKey = STL_NAMESPACE::equal_to, - class Alloc = STL_NAMESPACE::allocator > -class dense_hash_map { - private: - // Apparently select1st is not stl-standard, so we define our own - struct SelectKey { - const Key& operator()(const pair& p) const { - return p.first; - } - }; - struct SetKey { - void operator()(pair* value, const Key& new_key) const { - *const_cast(&value->first) = new_key; - // It would be nice to clear the rest of value here as well, in - // case it's taking up a lot of memory. We do this by clearing - // the value. This assumes T has a zero-arg constructor! - value->second = T(); - } - }; - - // The actual data - typedef dense_hashtable, Key, HashFcn, - SelectKey, SetKey, EqualKey, Alloc> ht; - ht rep; - - public: - typedef typename ht::key_type key_type; - typedef T data_type; - typedef T mapped_type; - typedef typename ht::value_type value_type; - typedef typename ht::hasher hasher; - typedef typename ht::key_equal key_equal; - typedef Alloc allocator_type; - - typedef typename ht::size_type size_type; - typedef typename ht::difference_type difference_type; - typedef typename ht::pointer pointer; - typedef typename ht::const_pointer const_pointer; - typedef typename ht::reference reference; - typedef typename ht::const_reference const_reference; - - typedef typename ht::iterator iterator; - typedef typename ht::const_iterator const_iterator; - typedef typename ht::local_iterator local_iterator; - typedef typename ht::const_local_iterator const_local_iterator; - - // Iterator functions - iterator begin() { return rep.begin(); } - iterator end() { return rep.end(); } - const_iterator begin() const { return rep.begin(); } - const_iterator end() const { return rep.end(); } - - - // These come from tr1's unordered_map. For us, a bucket has 0 or 1 elements. - local_iterator begin(size_type i) { return rep.begin(i); } - local_iterator end(size_type i) { return rep.end(i); } - const_local_iterator begin(size_type i) const { return rep.begin(i); } - const_local_iterator end(size_type i) const { return rep.end(i); } - - // Accessor functions - // TODO(csilvers): implement Alloc get_allocator() const; - hasher hash_funct() const { return rep.hash_funct(); } - hasher hash_function() const { return hash_funct(); } - key_equal key_eq() const { return rep.key_eq(); } - - - // Constructors - explicit dense_hash_map(size_type expected_max_items_in_table = 0, - const hasher& hf = hasher(), - const key_equal& eql = key_equal()) - : rep(expected_max_items_in_table, hf, eql) { } - - template - dense_hash_map(InputIterator f, InputIterator l, - size_type expected_max_items_in_table = 0, - const hasher& hf = hasher(), - const key_equal& eql = key_equal()) - : rep(expected_max_items_in_table, hf, eql) { - rep.insert(f, l); - } - // We use the default copy constructor - // We use the default operator=() - // We use the default destructor - - void clear() { rep.clear(); } - // This clears the hash map without resizing it down to the minimum - // bucket count, but rather keeps the number of buckets constant - void clear_no_resize() { rep.clear_no_resize(); } - void swap(dense_hash_map& hs) { rep.swap(hs.rep); } - - - // Functions concerning size - size_type size() const { return rep.size(); } - size_type max_size() const { return rep.max_size(); } - bool empty() const { return rep.empty(); } - size_type bucket_count() const { return rep.bucket_count(); } - size_type max_bucket_count() const { return rep.max_bucket_count(); } - - // These are tr1 methods. bucket() is the bucket the key is or would be in. - size_type bucket_size(size_type i) const { return rep.bucket_size(i); } - size_type bucket(const key_type& key) const { return rep.bucket(key); } - float load_factor() const { - return size() * 1.0f / bucket_count(); - } - float max_load_factor() const { - float shrink, grow; - rep.get_resizing_parameters(&shrink, &grow); - return grow; - } - void max_load_factor(float new_grow) { - float shrink, grow; - rep.get_resizing_parameters(&shrink, &grow); - rep.set_resizing_parameters(shrink, new_grow); - } - // These aren't tr1 methods but perhaps ought to be. - float min_load_factor() const { - float shrink, grow; - rep.get_resizing_parameters(&shrink, &grow); - return shrink; - } - void min_load_factor(float new_shrink) { - float shrink, grow; - rep.get_resizing_parameters(&shrink, &grow); - rep.set_resizing_parameters(new_shrink, grow); - } - // Deprecated; use min_load_factor() or max_load_factor() instead. - void set_resizing_parameters(float shrink, float grow) { - return rep.set_resizing_parameters(shrink, grow); - } - - void resize(size_type hint) { rep.resize(hint); } - void rehash(size_type hint) { resize(hint); } // the tr1 name - - // Lookup routines - iterator find(const key_type& key) { return rep.find(key); } - const_iterator find(const key_type& key) const { return rep.find(key); } - - data_type& operator[](const key_type& key) { // This is our value-add! - iterator it = find(key); - if (it != end()) { - return it->second; - } else { - return insert(value_type(key, data_type())).first->second; - } - } - - size_type count(const key_type& key) const { return rep.count(key); } - - pair equal_range(const key_type& key) { - return rep.equal_range(key); - } - pair equal_range(const key_type& key) const { - return rep.equal_range(key); - } - - // Insertion routines - pair insert(const value_type& obj) { return rep.insert(obj); } - template - void insert(InputIterator f, InputIterator l) { rep.insert(f, l); } - void insert(const_iterator f, const_iterator l) { rep.insert(f, l); } - // required for std::insert_iterator; the passed-in iterator is ignored - iterator insert(iterator, const value_type& obj) { return insert(obj).first; } - - - // Deletion and empty routines - // THESE ARE NON-STANDARD! I make you specify an "impossible" key - // value to identify deleted and empty buckets. You can change the - // deleted key as time goes on, or get rid of it entirely to be insert-only. - void set_empty_key(const key_type& key) { // YOU MUST CALL THIS! - rep.set_empty_key(value_type(key, data_type())); // rep wants a value - } - key_type empty_key() const { - return rep.empty_key().first; // rep returns a value - } - - void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); } - void clear_deleted_key() { rep.clear_deleted_key(); } - key_type deleted_key() const { return rep.deleted_key(); } - - // These are standard - size_type erase(const key_type& key) { return rep.erase(key); } - void erase(iterator it) { rep.erase(it); } - void erase(iterator f, iterator l) { rep.erase(f, l); } - - - // Comparison - bool operator==(const dense_hash_map& hs) const { return rep == hs.rep; } - bool operator!=(const dense_hash_map& hs) const { return rep != hs.rep; } - - - // I/O -- this is an add-on for writing metainformation to disk - bool write_metadata(FILE *fp) { return rep.write_metadata(fp); } - bool read_metadata(FILE *fp) { return rep.read_metadata(fp); } - bool write_nopointer_data(FILE *fp) { return rep.write_nopointer_data(fp); } - bool read_nopointer_data(FILE *fp) { return rep.read_nopointer_data(fp); } -}; - -// We need a global swap as well -template -inline void swap(dense_hash_map& hm1, - dense_hash_map& hm2) { - hm1.swap(hm2); -} - -_END_GOOGLE_NAMESPACE_ - -#endif /* _DENSE_HASH_MAP_H_ */ diff --git a/src/sparsehash-1.6/src/google/dense_hash_set b/src/sparsehash-1.6/src/google/dense_hash_set deleted file mode 100644 index 75be3e3..0000000 --- a/src/sparsehash-1.6/src/google/dense_hash_set +++ /dev/null @@ -1,296 +0,0 @@ -// Copyright (c) 2005, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// --- -// Author: Craig Silverstein -// -// This is just a very thin wrapper over densehashtable.h, just -// like sgi stl's stl_hash_set is a very thin wrapper over -// stl_hashtable. The major thing we define is operator[], because -// we have a concept of a data_type which stl_hashtable doesn't -// (it only has a key and a value). -// -// This is more different from dense_hash_map than you might think, -// because all iterators for sets are const (you obviously can't -// change the key, and for sets there is no value). -// -// NOTE: this is exactly like sparse_hash_set.h, with the word -// "sparse" replaced by "dense", except for the addition of -// set_empty_key(). -// -// YOU MUST CALL SET_EMPTY_KEY() IMMEDIATELY AFTER CONSTRUCTION. -// -// Otherwise your program will die in mysterious ways. -// -// In other respects, we adhere mostly to the STL semantics for -// hash-map. One important exception is that insert() may invalidate -// iterators entirely -- STL semantics are that insert() may reorder -// iterators, but they all still refer to something valid in the -// hashtable. Not so for us. Likewise, insert() may invalidate -// pointers into the hashtable. (Whether insert invalidates iterators -// and pointers depends on whether it results in a hashtable resize). -// On the plus side, delete() doesn't invalidate iterators or pointers -// at all, or even change the ordering of elements. -// -// Here are a few "power user" tips: -// -// 1) set_deleted_key(): -// If you want to use erase() you must call set_deleted_key(), -// in addition to set_empty_key(), after construction. -// The deleted and empty keys must differ. -// -// 2) resize(0): -// When an item is deleted, its memory isn't freed right -// away. This allows you to iterate over a hashtable, -// and call erase(), without invalidating the iterator. -// To force the memory to be freed, call resize(0). -// For tr1 compatibility, this can also be called as rehash(0). -// -// 3) min_load_factor(0.0) -// Setting the minimum load factor to 0.0 guarantees that -// the hash table will never shrink. -// -// Roughly speaking: -// (1) dense_hash_set: fastest, uses the most memory unless entries are small -// (2) sparse_hash_set: slowest, uses the least memory -// (3) hash_set / unordered_set (STL): in the middle -// -// Typically I use sparse_hash_set when I care about space and/or when -// I need to save the hashtable on disk. I use hash_set otherwise. I -// don't personally use dense_hash_set ever; some people use it for -// small sets with lots of lookups. -// -// - dense_hash_set has, typically, about 78% memory overhead (if your -// data takes up X bytes, the hash_set uses .78X more bytes in overhead). -// - sparse_hash_set has about 4 bits overhead per entry. -// - sparse_hash_set can be 3-7 times slower than the others for lookup and, -// especially, inserts. See time_hash_map.cc for details. -// -// See /usr/(local/)?doc/sparsehash-*/dense_hash_set.html -// for information about how to use this class. - -#ifndef _DENSE_HASH_SET_H_ -#define _DENSE_HASH_SET_H_ - -#include -#include // for FILE * in read()/write() -#include // for the default template args -#include // for equal_to -#include // for alloc<> -#include // for pair<> -#include HASH_FUN_H // defined in config.h -#include - - -_START_GOOGLE_NAMESPACE_ - -using STL_NAMESPACE::pair; - -template , // defined in sparseconfig.h - class EqualKey = STL_NAMESPACE::equal_to, - class Alloc = STL_NAMESPACE::allocator > -class dense_hash_set { - private: - // Apparently identity is not stl-standard, so we define our own - struct Identity { - Value& operator()(Value& v) const { return v; } - const Value& operator()(const Value& v) const { return v; } - }; - struct SetKey { - void operator()(Value* value, const Value& new_key) const { - *value = new_key; - } - }; - - // The actual data - typedef dense_hashtable ht; - ht rep; - - public: - typedef typename ht::key_type key_type; - typedef typename ht::value_type value_type; - typedef typename ht::hasher hasher; - typedef typename ht::key_equal key_equal; - typedef Alloc allocator_type; - - typedef typename ht::size_type size_type; - typedef typename ht::difference_type difference_type; - typedef typename ht::const_pointer pointer; - typedef typename ht::const_pointer const_pointer; - typedef typename ht::const_reference reference; - typedef typename ht::const_reference const_reference; - - typedef typename ht::const_iterator iterator; - typedef typename ht::const_iterator const_iterator; - typedef typename ht::const_local_iterator local_iterator; - typedef typename ht::const_local_iterator const_local_iterator; - - - // Iterator functions -- recall all iterators are const - iterator begin() const { return rep.begin(); } - iterator end() const { return rep.end(); } - - // These come from tr1's unordered_set. For us, a bucket has 0 or 1 elements. - local_iterator begin(size_type i) const { return rep.begin(i); } - local_iterator end(size_type i) const { return rep.end(i); } - - - // Accessor functions - hasher hash_funct() const { return rep.hash_funct(); } - key_equal key_eq() const { return rep.key_eq(); } - - - // Constructors - explicit dense_hash_set(size_type expected_max_items_in_table = 0, - const hasher& hf = hasher(), - const key_equal& eql = key_equal()) - : rep(expected_max_items_in_table, hf, eql) { } - - template - dense_hash_set(InputIterator f, InputIterator l, - size_type expected_max_items_in_table = 0, - const hasher& hf = hasher(), - const key_equal& eql = key_equal()) - : rep(expected_max_items_in_table, hf, eql) { - rep.insert(f, l); - } - // We use the default copy constructor - // We use the default operator=() - // We use the default destructor - - void clear() { rep.clear(); } - // This clears the hash set without resizing it down to the minimum - // bucket count, but rather keeps the number of buckets constant - void clear_no_resize() { rep.clear_no_resize(); } - void swap(dense_hash_set& hs) { rep.swap(hs.rep); } - - - // Functions concerning size - size_type size() const { return rep.size(); } - size_type max_size() const { return rep.max_size(); } - bool empty() const { return rep.empty(); } - size_type bucket_count() const { return rep.bucket_count(); } - size_type max_bucket_count() const { return rep.max_bucket_count(); } - - // These are tr1 methods. bucket() is the bucket the key is or would be in. - size_type bucket_size(size_type i) const { return rep.bucket_size(i); } - size_type bucket(const key_type& key) const { return rep.bucket(key); } - float load_factor() const { - return size() * 1.0f / bucket_count(); - } - float max_load_factor() const { - float shrink, grow; - rep.get_resizing_parameters(&shrink, &grow); - return grow; - } - void max_load_factor(float new_grow) { - float shrink, grow; - rep.get_resizing_parameters(&shrink, &grow); - rep.set_resizing_parameters(shrink, new_grow); - } - // These aren't tr1 methods but perhaps ought to be. - float min_load_factor() const { - float shrink, grow; - rep.get_resizing_parameters(&shrink, &grow); - return shrink; - } - void min_load_factor(float new_shrink) { - float shrink, grow; - rep.get_resizing_parameters(&shrink, &grow); - rep.set_resizing_parameters(new_shrink, grow); - } - // Deprecated; use min_load_factor() or max_load_factor() instead. - void set_resizing_parameters(float shrink, float grow) { - return rep.set_resizing_parameters(shrink, grow); - } - - void resize(size_type hint) { rep.resize(hint); } - void rehash(size_type hint) { resize(hint); } // the tr1 name - - // Lookup routines - iterator find(const key_type& key) const { return rep.find(key); } - - size_type count(const key_type& key) const { return rep.count(key); } - - pair equal_range(const key_type& key) const { - return rep.equal_range(key); - } - - // Insertion routines - pair insert(const value_type& obj) { - pair p = rep.insert(obj); - return pair(p.first, p.second); // const to non-const - } - template - void insert(InputIterator f, InputIterator l) { rep.insert(f, l); } - void insert(const_iterator f, const_iterator l) { rep.insert(f, l); } - // required for std::insert_iterator; the passed-in iterator is ignored - iterator insert(iterator, const value_type& obj) { return insert(obj).first; } - - - // Deletion and empty routines - // THESE ARE NON-STANDARD! I make you specify an "impossible" key - // value to identify deleted and empty buckets. You can change the - // deleted key as time goes on, or get rid of it entirely to be insert-only. - void set_empty_key(const key_type& key) { rep.set_empty_key(key); } - key_type empty_key() const { return rep.empty_key(); } - - void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); } - void clear_deleted_key() { rep.clear_deleted_key(); } - key_type deleted_key() const { return rep.deleted_key(); } - - // These are standard - size_type erase(const key_type& key) { return rep.erase(key); } - void erase(iterator it) { rep.erase(it); } - void erase(iterator f, iterator l) { rep.erase(f, l); } - - - // Comparison - bool operator==(const dense_hash_set& hs) const { return rep == hs.rep; } - bool operator!=(const dense_hash_set& hs) const { return rep != hs.rep; } - - - // I/O -- this is an add-on for writing metainformation to disk - bool write_metadata(FILE *fp) { return rep.write_metadata(fp); } - bool read_metadata(FILE *fp) { return rep.read_metadata(fp); } - bool write_nopointer_data(FILE *fp) { return rep.write_nopointer_data(fp); } - bool read_nopointer_data(FILE *fp) { return rep.read_nopointer_data(fp); } -}; - -template -inline void swap(dense_hash_set& hs1, - dense_hash_set& hs2) { - hs1.swap(hs2); -} - -_END_GOOGLE_NAMESPACE_ - -#endif /* _DENSE_HASH_SET_H_ */ diff --git a/src/sparsehash-1.6/src/google/sparse_hash_map b/src/sparsehash-1.6/src/google/sparse_hash_map deleted file mode 100644 index 82ab2bd..0000000 --- a/src/sparsehash-1.6/src/google/sparse_hash_map +++ /dev/null @@ -1,301 +0,0 @@ -// Copyright (c) 2005, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// --- -// Author: Craig Silverstein -// -// This is just a very thin wrapper over sparsehashtable.h, just -// like sgi stl's stl_hash_map is a very thin wrapper over -// stl_hashtable. The major thing we define is operator[], because -// we have a concept of a data_type which stl_hashtable doesn't -// (it only has a key and a value). -// -// We adhere mostly to the STL semantics for hash-map. One important -// exception is that insert() may invalidate iterators entirely -- STL -// semantics are that insert() may reorder iterators, but they all -// still refer to something valid in the hashtable. Not so for us. -// Likewise, insert() may invalidate pointers into the hashtable. -// (Whether insert invalidates iterators and pointers depends on -// whether it results in a hashtable resize). On the plus side, -// delete() doesn't invalidate iterators or pointers at all, or even -// change the ordering of elements. -// -// Here are a few "power user" tips: -// -// 1) set_deleted_key(): -// Unlike STL's hash_map, if you want to use erase() you -// *must* call set_deleted_key() after construction. -// -// 2) resize(0): -// When an item is deleted, its memory isn't freed right -// away. This is what allows you to iterate over a hashtable -// and call erase() without invalidating the iterator. -// To force the memory to be freed, call resize(0). -// For tr1 compatibility, this can also be called as rehash(0). -// -// 3) min_load_factor(0.0) -// Setting the minimum load factor to 0.0 guarantees that -// the hash table will never shrink. -// -// Roughly speaking: -// (1) dense_hash_map: fastest, uses the most memory unless entries are small -// (2) sparse_hash_map: slowest, uses the least memory -// (3) hash_map / unordered_map (STL): in the middle -// -// Typically I use sparse_hash_map when I care about space and/or when -// I need to save the hashtable on disk. I use hash_map otherwise. I -// don't personally use dense_hash_map ever; some people use it for -// small maps with lots of lookups. -// -// - dense_hash_map has, typically, about 78% memory overhead (if your -// data takes up X bytes, the hash_map uses .78X more bytes in overhead). -// - sparse_hash_map has about 4 bits overhead per entry. -// - sparse_hash_map can be 3-7 times slower than the others for lookup and, -// especially, inserts. See time_hash_map.cc for details. -// -// See /usr/(local/)?doc/sparsehash-*/sparse_hash_map.html -// for information about how to use this class. - -#ifndef _SPARSE_HASH_MAP_H_ -#define _SPARSE_HASH_MAP_H_ - -#include -#include // for FILE * in read()/write() -#include // for the default template args -#include // for equal_to -#include // for alloc<> -#include // for pair<> -#include HASH_FUN_H // defined in config.h -#include - - -_START_GOOGLE_NAMESPACE_ - -using STL_NAMESPACE::pair; - -template , // defined in sparseconfig.h - class EqualKey = STL_NAMESPACE::equal_to, - class Alloc = STL_NAMESPACE::allocator > -class sparse_hash_map { - private: - // Apparently select1st is not stl-standard, so we define our own - struct SelectKey { - const Key& operator()(const pair& p) const { - return p.first; - } - }; - struct SetKey { - void operator()(pair* value, const Key& new_key) const { - *const_cast(&value->first) = new_key; - // It would be nice to clear the rest of value here as well, in - // case it's taking up a lot of memory. We do this by clearing - // the value. This assumes T has a zero-arg constructor! - value->second = T(); - } - }; - - // The actual data - typedef sparse_hashtable, Key, HashFcn, - SelectKey, SetKey, EqualKey, Alloc> ht; - ht rep; - - public: - typedef typename ht::key_type key_type; - typedef T data_type; - typedef T mapped_type; - typedef typename ht::value_type value_type; - typedef typename ht::hasher hasher; - typedef typename ht::key_equal key_equal; - typedef Alloc allocator_type; - - typedef typename ht::size_type size_type; - typedef typename ht::difference_type difference_type; - typedef typename ht::pointer pointer; - typedef typename ht::const_pointer const_pointer; - typedef typename ht::reference reference; - typedef typename ht::const_reference const_reference; - - typedef typename ht::iterator iterator; - typedef typename ht::const_iterator const_iterator; - typedef typename ht::local_iterator local_iterator; - typedef typename ht::const_local_iterator const_local_iterator; - - // Iterator functions - iterator begin() { return rep.begin(); } - iterator end() { return rep.end(); } - const_iterator begin() const { return rep.begin(); } - const_iterator end() const { return rep.end(); } - - // These come from tr1's unordered_map. For us, a bucket has 0 or 1 elements. - local_iterator begin(size_type i) { return rep.begin(i); } - local_iterator end(size_type i) { return rep.end(i); } - const_local_iterator begin(size_type i) const { return rep.begin(i); } - const_local_iterator end(size_type i) const { return rep.end(i); } - - // Accessor functions - // TODO(csilvers): implement Alloc get_allocator() const; - hasher hash_funct() const { return rep.hash_funct(); } - hasher hash_function() const { return hash_funct(); } - key_equal key_eq() const { return rep.key_eq(); } - - - // Constructors - explicit sparse_hash_map(size_type expected_max_items_in_table = 0, - const hasher& hf = hasher(), - const key_equal& eql = key_equal()) - : rep(expected_max_items_in_table, hf, eql) { } - - template - sparse_hash_map(InputIterator f, InputIterator l, - size_type expected_max_items_in_table = 0, - const hasher& hf = hasher(), - const key_equal& eql = key_equal()) - : rep(expected_max_items_in_table, hf, eql) { - rep.insert(f, l); - } - // We use the default copy constructor - // We use the default operator=() - // We use the default destructor - - void clear() { rep.clear(); } - void swap(sparse_hash_map& hs) { rep.swap(hs.rep); } - - - // Functions concerning size - size_type size() const { return rep.size(); } - size_type max_size() const { return rep.max_size(); } - bool empty() const { return rep.empty(); } - size_type bucket_count() const { return rep.bucket_count(); } - size_type max_bucket_count() const { return rep.max_bucket_count(); } - - // These are tr1 methods. bucket() is the bucket the key is or would be in. - size_type bucket_size(size_type i) const { return rep.bucket_size(i); } - size_type bucket(const key_type& key) const { return rep.bucket(key); } - float load_factor() const { - return size() * 1.0f / bucket_count(); - } - float max_load_factor() const { - float shrink, grow; - rep.get_resizing_parameters(&shrink, &grow); - return grow; - } - void max_load_factor(float new_grow) { - float shrink, grow; - rep.get_resizing_parameters(&shrink, &grow); - rep.set_resizing_parameters(shrink, new_grow); - } - // These aren't tr1 methods but perhaps ought to be. - float min_load_factor() const { - float shrink, grow; - rep.get_resizing_parameters(&shrink, &grow); - return shrink; - } - void min_load_factor(float new_shrink) { - float shrink, grow; - rep.get_resizing_parameters(&shrink, &grow); - rep.set_resizing_parameters(new_shrink, grow); - } - // Deprecated; use min_load_factor() or max_load_factor() instead. - void set_resizing_parameters(float shrink, float grow) { - return rep.set_resizing_parameters(shrink, grow); - } - - void resize(size_type hint) { rep.resize(hint); } - void rehash(size_type hint) { resize(hint); } // the tr1 name - - // Lookup routines - iterator find(const key_type& key) { return rep.find(key); } - const_iterator find(const key_type& key) const { return rep.find(key); } - - data_type& operator[](const key_type& key) { // This is our value-add! - iterator it = find(key); - if (it != end()) { - return it->second; - } else { - return insert(value_type(key, data_type())).first->second; - } - } - - size_type count(const key_type& key) const { return rep.count(key); } - - pair equal_range(const key_type& key) { - return rep.equal_range(key); - } - pair equal_range(const key_type& key) const { - return rep.equal_range(key); - } - - // Insertion routines - pair insert(const value_type& obj) { return rep.insert(obj); } - template - void insert(InputIterator f, InputIterator l) { rep.insert(f, l); } - void insert(const_iterator f, const_iterator l) { rep.insert(f, l); } - // required for std::insert_iterator; the passed-in iterator is ignored - iterator insert(iterator, const value_type& obj) { return insert(obj).first; } - - - // Deletion routines - // THESE ARE NON-STANDARD! I make you specify an "impossible" key - // value to identify deleted buckets. You can change the key as - // time goes on, or get rid of it entirely to be insert-only. - void set_deleted_key(const key_type& key) { - rep.set_deleted_key(key); - } - void clear_deleted_key() { rep.clear_deleted_key(); } - key_type deleted_key() const { return rep.deleted_key(); } - - // These are standard - size_type erase(const key_type& key) { return rep.erase(key); } - void erase(iterator it) { rep.erase(it); } - void erase(iterator f, iterator l) { rep.erase(f, l); } - - - // Comparison - bool operator==(const sparse_hash_map& hs) const { return rep == hs.rep; } - bool operator!=(const sparse_hash_map& hs) const { return rep != hs.rep; } - - - // I/O -- this is an add-on for writing metainformation to disk - bool write_metadata(FILE *fp) { return rep.write_metadata(fp); } - bool read_metadata(FILE *fp) { return rep.read_metadata(fp); } - bool write_nopointer_data(FILE *fp) { return rep.write_nopointer_data(fp); } - bool read_nopointer_data(FILE *fp) { return rep.read_nopointer_data(fp); } -}; - -// We need a global swap as well -template -inline void swap(sparse_hash_map& hm1, - sparse_hash_map& hm2) { - hm1.swap(hm2); -} - -_END_GOOGLE_NAMESPACE_ - -#endif /* _SPARSE_HASH_MAP_H_ */ diff --git a/src/sparsehash-1.6/src/google/sparse_hash_set b/src/sparsehash-1.6/src/google/sparse_hash_set deleted file mode 100644 index 17b4ad5..0000000 --- a/src/sparsehash-1.6/src/google/sparse_hash_set +++ /dev/null @@ -1,282 +0,0 @@ -// Copyright (c) 2005, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// --- -// Author: Craig Silverstein -// -// This is just a very thin wrapper over sparsehashtable.h, just -// like sgi stl's stl_hash_set is a very thin wrapper over -// stl_hashtable. The major thing we define is operator[], because -// we have a concept of a data_type which stl_hashtable doesn't -// (it only has a key and a value). -// -// This is more different from sparse_hash_map than you might think, -// because all iterators for sets are const (you obviously can't -// change the key, and for sets there is no value). -// -// We adhere mostly to the STL semantics for hash-map. One important -// exception is that insert() may invalidate iterators entirely -- STL -// semantics are that insert() may reorder iterators, but they all -// still refer to something valid in the hashtable. Not so for us. -// Likewise, insert() may invalidate pointers into the hashtable. -// (Whether insert invalidates iterators and pointers depends on -// whether it results in a hashtable resize). On the plus side, -// delete() doesn't invalidate iterators or pointers at all, or even -// change the ordering of elements. -// -// Here are a few "power user" tips: -// -// 1) set_deleted_key(): -// Unlike STL's hash_map, if you want to use erase() you -// *must* call set_deleted_key() after construction. -// -// 2) resize(0): -// When an item is deleted, its memory isn't freed right -// away. This allows you to iterate over a hashtable, -// and call erase(), without invalidating the iterator. -// To force the memory to be freed, call resize(0). -// For tr1 compatibility, this can also be called as rehash(0). -// -// 3) min_load_factor(0.0) -// Setting the minimum load factor to 0.0 guarantees that -// the hash table will never shrink. -// -// Roughly speaking: -// (1) dense_hash_set: fastest, uses the most memory unless entries are small -// (2) sparse_hash_set: slowest, uses the least memory -// (3) hash_set / unordered_set (STL): in the middle -// -// Typically I use sparse_hash_set when I care about space and/or when -// I need to save the hashtable on disk. I use hash_set otherwise. I -// don't personally use dense_hash_set ever; some people use it for -// small sets with lots of lookups. -// -// - dense_hash_set has, typically, about 78% memory overhead (if your -// data takes up X bytes, the hash_set uses .78X more bytes in overhead). -// - sparse_hash_set has about 4 bits overhead per entry. -// - sparse_hash_set can be 3-7 times slower than the others for lookup and, -// especially, inserts. See time_hash_map.cc for details. -// -// See /usr/(local/)?doc/sparsehash-*/sparse_hash_set.html -// for information about how to use this class. - -#ifndef _SPARSE_HASH_SET_H_ -#define _SPARSE_HASH_SET_H_ - -#include -#include // for FILE * in read()/write() -#include // for the default template args -#include // for equal_to -#include // for alloc<> -#include // for pair<> -#include HASH_FUN_H // defined in config.h -#include - -_START_GOOGLE_NAMESPACE_ - -using STL_NAMESPACE::pair; - -template , // defined in sparseconfig.h - class EqualKey = STL_NAMESPACE::equal_to, - class Alloc = STL_NAMESPACE::allocator > -class sparse_hash_set { - private: - // Apparently identity is not stl-standard, so we define our own - struct Identity { - Value& operator()(Value& v) const { return v; } - const Value& operator()(const Value& v) const { return v; } - }; - struct SetKey { - void operator()(Value* value, const Value& new_key) const { - *value = new_key; - } - }; - - // The actual data - typedef sparse_hashtable ht; - ht rep; - - public: - typedef typename ht::key_type key_type; - typedef typename ht::value_type value_type; - typedef typename ht::hasher hasher; - typedef typename ht::key_equal key_equal; - typedef Alloc allocator_type; - - typedef typename ht::size_type size_type; - typedef typename ht::difference_type difference_type; - typedef typename ht::const_pointer pointer; - typedef typename ht::const_pointer const_pointer; - typedef typename ht::const_reference reference; - typedef typename ht::const_reference const_reference; - - typedef typename ht::const_iterator iterator; - typedef typename ht::const_iterator const_iterator; - typedef typename ht::const_local_iterator local_iterator; - typedef typename ht::const_local_iterator const_local_iterator; - - - // Iterator functions -- recall all iterators are const - iterator begin() const { return rep.begin(); } - iterator end() const { return rep.end(); } - - // These come from tr1's unordered_set. For us, a bucket has 0 or 1 elements. - local_iterator begin(size_type i) const { return rep.begin(i); } - local_iterator end(size_type i) const { return rep.end(i); } - - - // Accessor functions - // TODO(csilvers): implement Alloc get_allocator() const; - hasher hash_funct() const { return rep.hash_funct(); } - hasher hash_function() const { return hash_funct(); } // tr1 name - key_equal key_eq() const { return rep.key_eq(); } - - - // Constructors - explicit sparse_hash_set(size_type expected_max_items_in_table = 0, - const hasher& hf = hasher(), - const key_equal& eql = key_equal()) - : rep(expected_max_items_in_table, hf, eql) { } - - template - sparse_hash_set(InputIterator f, InputIterator l, - size_type expected_max_items_in_table = 0, - const hasher& hf = hasher(), - const key_equal& eql = key_equal()) - : rep(expected_max_items_in_table, hf, eql) { - rep.insert(f, l); - } - // We use the default copy constructor - // We use the default operator=() - // We use the default destructor - - void clear() { rep.clear(); } - void swap(sparse_hash_set& hs) { rep.swap(hs.rep); } - - - // Functions concerning size - size_type size() const { return rep.size(); } - size_type max_size() const { return rep.max_size(); } - bool empty() const { return rep.empty(); } - size_type bucket_count() const { return rep.bucket_count(); } - size_type max_bucket_count() const { return rep.max_bucket_count(); } - - // These are tr1 methods. bucket() is the bucket the key is or would be in. - size_type bucket_size(size_type i) const { return rep.bucket_size(i); } - size_type bucket(const key_type& key) const { return rep.bucket(key); } - float load_factor() const { - return size() * 1.0f / bucket_count(); - } - float max_load_factor() const { - float shrink, grow; - rep.get_resizing_parameters(&shrink, &grow); - return grow; - } - void max_load_factor(float new_grow) { - float shrink, grow; - rep.get_resizing_parameters(&shrink, &grow); - rep.set_resizing_parameters(shrink, new_grow); - } - // These aren't tr1 methods but perhaps ought to be. - float min_load_factor() const { - float shrink, grow; - rep.get_resizing_parameters(&shrink, &grow); - return shrink; - } - void min_load_factor(float new_shrink) { - float shrink, grow; - rep.get_resizing_parameters(&shrink, &grow); - rep.set_resizing_parameters(new_shrink, grow); - } - // Deprecated; use min_load_factor() or max_load_factor() instead. - void set_resizing_parameters(float shrink, float grow) { - return rep.set_resizing_parameters(shrink, grow); - } - - void resize(size_type hint) { rep.resize(hint); } - void rehash(size_type hint) { resize(hint); } // the tr1 name - - // Lookup routines - iterator find(const key_type& key) const { return rep.find(key); } - - size_type count(const key_type& key) const { return rep.count(key); } - - pair equal_range(const key_type& key) const { - return rep.equal_range(key); - } - - // Insertion routines - pair insert(const value_type& obj) { - pair p = rep.insert(obj); - return pair(p.first, p.second); // const to non-const - } - template - void insert(InputIterator f, InputIterator l) { rep.insert(f, l); } - void insert(const_iterator f, const_iterator l) { rep.insert(f, l); } - // required for std::insert_iterator; the passed-in iterator is ignored - iterator insert(iterator, const value_type& obj) { return insert(obj).first; } - - - // Deletion routines - // THESE ARE NON-STANDARD! I make you specify an "impossible" key - // value to identify deleted buckets. You can change the key as - // time goes on, or get rid of it entirely to be insert-only. - void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); } - void clear_deleted_key() { rep.clear_deleted_key(); } - key_type deleted_key() const { return rep.deleted_key(); } - - // These are standard - size_type erase(const key_type& key) { return rep.erase(key); } - void erase(iterator it) { rep.erase(it); } - void erase(iterator f, iterator l) { rep.erase(f, l); } - - - // Comparison - bool operator==(const sparse_hash_set& hs) const { return rep == hs.rep; } - bool operator!=(const sparse_hash_set& hs) const { return rep != hs.rep; } - - - // I/O -- this is an add-on for writing metainformation to disk - bool write_metadata(FILE *fp) { return rep.write_metadata(fp); } - bool read_metadata(FILE *fp) { return rep.read_metadata(fp); } - bool write_nopointer_data(FILE *fp) { return rep.write_nopointer_data(fp); } - bool read_nopointer_data(FILE *fp) { return rep.read_nopointer_data(fp); } -}; - -template -inline void swap(sparse_hash_set& hs1, - sparse_hash_set& hs2) { - hs1.swap(hs2); -} - -_END_GOOGLE_NAMESPACE_ - -#endif /* _SPARSE_HASH_SET_H_ */ diff --git a/src/sparsehash-1.6/src/google/sparsehash/densehashtable.h b/src/sparsehash-1.6/src/google/sparsehash/densehashtable.h deleted file mode 100644 index 50d9b92..0000000 --- a/src/sparsehash-1.6/src/google/sparsehash/densehashtable.h +++ /dev/null @@ -1,1158 +0,0 @@ -// Copyright (c) 2005, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// --- -// Author: Craig Silverstein -// -// A dense hashtable is a particular implementation of -// a hashtable: one that is meant to minimize memory allocation. -// It does this by using an array to store all the data. We -// steal a value from the key space to indicate "empty" array -// elements (ie indices where no item lives) and another to indicate -// "deleted" elements. -// -// (Note it is possible to change the value of the delete key -// on the fly; you can even remove it, though after that point -// the hashtable is insert_only until you set it again. The empty -// value however can't be changed.) -// -// To minimize allocation and pointer overhead, we use internal -// probing, in which the hashtable is a single table, and collisions -// are resolved by trying to insert again in another bucket. The -// most cache-efficient internal probing schemes are linear probing -// (which suffers, alas, from clumping) and quadratic probing, which -// is what we implement by default. -// -// Type requirements: value_type is required to be Copy Constructible -// and Default Constructible. It is not required to be (and commonly -// isn't) Assignable. -// -// You probably shouldn't use this code directly. Use -// or instead. - -// You can change the following below: -// HT_OCCUPANCY_FLT -- how full before we double size -// HT_EMPTY_FLT -- how empty before we halve size -// HT_MIN_BUCKETS -- default smallest bucket size -// -// You can also change enlarge_resize_percent (which defaults to -// HT_OCCUPANCY_FLT), and shrink_resize_percent (which defaults to -// HT_EMPTY_FLT) with set_resizing_parameters(). -// -// How to decide what values to use? -// shrink_resize_percent's default of .4 * OCCUPANCY_FLT, is probably good. -// HT_MIN_BUCKETS is probably unnecessary since you can specify -// (indirectly) the starting number of buckets at construct-time. -// For enlarge_resize_percent, you can use this chart to try to trade-off -// expected lookup time to the space taken up. By default, this -// code uses quadratic probing, though you can change it to linear -// via _JUMP below if you really want to. -// -// From http://www.augustana.ca/~mohrj/courses/1999.fall/csc210/lecture_notes/hashing.html -// NUMBER OF PROBES / LOOKUP Successful Unsuccessful -// Quadratic collision resolution 1 - ln(1-L) - L/2 1/(1-L) - L - ln(1-L) -// Linear collision resolution [1+1/(1-L)]/2 [1+1/(1-L)2]/2 -// -// -- enlarge_resize_percent -- 0.10 0.50 0.60 0.75 0.80 0.90 0.99 -// QUADRATIC COLLISION RES. -// probes/successful lookup 1.05 1.44 1.62 2.01 2.21 2.85 5.11 -// probes/unsuccessful lookup 1.11 2.19 2.82 4.64 5.81 11.4 103.6 -// LINEAR COLLISION RES. -// probes/successful lookup 1.06 1.5 1.75 2.5 3.0 5.5 50.5 -// probes/unsuccessful lookup 1.12 2.5 3.6 8.5 13.0 50.0 5000.0 - -#ifndef _DENSEHASHTABLE_H_ -#define _DENSEHASHTABLE_H_ - -// The probing method -// Linear probing -// #define JUMP_(key, num_probes) ( 1 ) -// Quadratic-ish probing -#define JUMP_(key, num_probes) ( num_probes ) - - -#include -#include -#include -#include // for abort() -#include // For swap(), eg -#include // For length_error -#include // For cerr -#include // For uninitialized_fill, uninitialized_copy -#include // for pair<> -#include // for facts about iterator tags -#include // for true_type, integral_constant, etc. - -_START_GOOGLE_NAMESPACE_ - -using STL_NAMESPACE::pair; - -// Hashtable class, used to implement the hashed associative containers -// hash_set and hash_map. - -// Value: what is stored in the table (each bucket is a Value). -// Key: something in a 1-to-1 correspondence to a Value, that can be used -// to search for a Value in the table (find() takes a Key). -// HashFcn: Takes a Key and returns an integer, the more unique the better. -// ExtractKey: given a Value, returns the unique Key associated with it. -// SetKey: given a Value* and a Key, modifies the value such that -// ExtractKey(value) == key. We guarantee this is only called -// with key == deleted_key or key == empty_key. -// EqualKey: Given two Keys, says whether they are the same (that is, -// if they are both associated with the same Value). -// Alloc: STL allocator to use to allocate memory. Currently ignored. - -template -class dense_hashtable; - -template -struct dense_hashtable_iterator; - -template -struct dense_hashtable_const_iterator; - -// We're just an array, but we need to skip over empty and deleted elements -template -struct dense_hashtable_iterator { - public: - typedef dense_hashtable_iterator iterator; - typedef dense_hashtable_const_iterator const_iterator; - - typedef STL_NAMESPACE::forward_iterator_tag iterator_category; - typedef V value_type; - typedef ptrdiff_t difference_type; - typedef size_t size_type; - typedef V& reference; // Value - typedef V* pointer; - - // "Real" constructor and default constructor - dense_hashtable_iterator(const dense_hashtable *h, - pointer it, pointer it_end, bool advance) - : ht(h), pos(it), end(it_end) { - if (advance) advance_past_empty_and_deleted(); - } - dense_hashtable_iterator() { } - // The default destructor is fine; we don't define one - // The default operator= is fine; we don't define one - - // Happy dereferencer - reference operator*() const { return *pos; } - pointer operator->() const { return &(operator*()); } - - // Arithmetic. The only hard part is making sure that - // we're not on an empty or marked-deleted array element - void advance_past_empty_and_deleted() { - while ( pos != end && (ht->test_empty(*this) || ht->test_deleted(*this)) ) - ++pos; - } - iterator& operator++() { - assert(pos != end); ++pos; advance_past_empty_and_deleted(); return *this; - } - iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; } - - // Comparison. - bool operator==(const iterator& it) const { return pos == it.pos; } - bool operator!=(const iterator& it) const { return pos != it.pos; } - - - // The actual data - const dense_hashtable *ht; - pointer pos, end; -}; - - -// Now do it all again, but with const-ness! -template -struct dense_hashtable_const_iterator { - public: - typedef dense_hashtable_iterator iterator; - typedef dense_hashtable_const_iterator const_iterator; - - typedef STL_NAMESPACE::forward_iterator_tag iterator_category; - typedef V value_type; - typedef ptrdiff_t difference_type; - typedef size_t size_type; - typedef const V& reference; // Value - typedef const V* pointer; - - // "Real" constructor and default constructor - dense_hashtable_const_iterator( - const dense_hashtable *h, - pointer it, pointer it_end, bool advance) - : ht(h), pos(it), end(it_end) { - if (advance) advance_past_empty_and_deleted(); - } - dense_hashtable_const_iterator() { } - // This lets us convert regular iterators to const iterators - dense_hashtable_const_iterator(const iterator &it) - : ht(it.ht), pos(it.pos), end(it.end) { } - // The default destructor is fine; we don't define one - // The default operator= is fine; we don't define one - - // Happy dereferencer - reference operator*() const { return *pos; } - pointer operator->() const { return &(operator*()); } - - // Arithmetic. The only hard part is making sure that - // we're not on an empty or marked-deleted array element - void advance_past_empty_and_deleted() { - while ( pos != end && (ht->test_empty(*this) || ht->test_deleted(*this)) ) - ++pos; - } - const_iterator& operator++() { - assert(pos != end); ++pos; advance_past_empty_and_deleted(); return *this; - } - const_iterator operator++(int) { const_iterator tmp(*this); ++*this; return tmp; } - - // Comparison. - bool operator==(const const_iterator& it) const { return pos == it.pos; } - bool operator!=(const const_iterator& it) const { return pos != it.pos; } - - - // The actual data - const dense_hashtable *ht; - pointer pos, end; -}; - -template -class dense_hashtable { - public: - typedef Key key_type; - typedef Value value_type; - typedef HashFcn hasher; - typedef EqualKey key_equal; - - typedef size_t size_type; - typedef ptrdiff_t difference_type; - typedef value_type* pointer; - typedef const value_type* const_pointer; - typedef value_type& reference; - typedef const value_type& const_reference; - typedef dense_hashtable_iterator - iterator; - - typedef dense_hashtable_const_iterator - const_iterator; - - // These come from tr1. For us they're the same as regular iterators. - typedef iterator local_iterator; - typedef const_iterator const_local_iterator; - - // How full we let the table get before we resize, by default. - // Knuth says .8 is good -- higher causes us to probe too much, - // though it saves memory. - static const float HT_OCCUPANCY_FLT; // = 0.5; - - // How empty we let the table get before we resize lower, by default. - // (0.0 means never resize lower.) - // It should be less than OCCUPANCY_FLT / 2 or we thrash resizing - static const float HT_EMPTY_FLT; // = 0.4 * HT_OCCUPANCY_FLT - - // Minimum size we're willing to let hashtables be. - // Must be a power of two, and at least 4. - // Note, however, that for a given hashtable, the initial size is a - // function of the first constructor arg, and may be >HT_MIN_BUCKETS. - static const size_t HT_MIN_BUCKETS = 4; - - // By default, if you don't specify a hashtable size at - // construction-time, we use this size. Must be a power of two, and - // at least HT_MIN_BUCKETS. - static const size_t HT_DEFAULT_STARTING_BUCKETS = 32; - - - // ITERATOR FUNCTIONS - iterator begin() { return iterator(this, table, - table + num_buckets, true); } - iterator end() { return iterator(this, table + num_buckets, - table + num_buckets, true); } - const_iterator begin() const { return const_iterator(this, table, - table+num_buckets,true);} - const_iterator end() const { return const_iterator(this, table + num_buckets, - table+num_buckets,true);} - - // These come from tr1 unordered_map. They iterate over 'bucket' n. - // For sparsehashtable, we could consider each 'group' to be a bucket, - // I guess, but I don't really see the point. We'll just consider - // bucket n to be the n-th element of the sparsetable, if it's occupied, - // or some empty element, otherwise. - local_iterator begin(size_type i) { - return local_iterator(this, table + i, table + i+1, false); - } - local_iterator end(size_type i) { - local_iterator it = begin(i); - if (!test_empty(i) && !test_deleted(i)) - ++it; - return it; - } - const_local_iterator begin(size_type i) const { - return const_local_iterator(this, table + i, table + i+1, false); - } - const_local_iterator end(size_type i) const { - const_local_iterator it = begin(i); - if (!test_empty(i) && !test_deleted(i)) - ++it; - return it; - } - - // ACCESSOR FUNCTIONS for the things we templatize on, basically - hasher hash_funct() const { return hash; } - key_equal key_eq() const { return equals; } - - // Accessor function for statistics gathering. - int num_table_copies() const { return num_ht_copies; } - - private: - // Annoyingly, we can't copy values around, because they might have - // const components (they're probably pair). We use - // explicit destructor invocation and placement new to get around - // this. Arg. - void set_value(value_type* dst, const value_type& src) { - dst->~value_type(); - new(dst) value_type(src); - } - - void destroy_buckets(size_type first, size_type last) { - for ( ; first != last; ++first) - table[first].~value_type(); - } - - // DELETE HELPER FUNCTIONS - // This lets the user describe a key that will indicate deleted - // table entries. This key should be an "impossible" entry -- - // if you try to insert it for real, you won't be able to retrieve it! - // (NB: while you pass in an entire value, only the key part is looked - // at. This is just because I don't know how to assign just a key.) - private: - void squash_deleted() { // gets rid of any deleted entries we have - if ( num_deleted ) { // get rid of deleted before writing - dense_hashtable tmp(*this); // copying will get rid of deleted - swap(tmp); // now we are tmp - } - assert(num_deleted == 0); - } - - bool test_deleted_key(const key_type& key) const { - // The num_deleted test is crucial for read(): after read(), the ht values - // are garbage, and we don't want to think some of them are deleted. - // Invariant: !use_deleted implies num_deleted is 0. - assert(use_deleted || num_deleted == 0); - return num_deleted > 0 && equals(delkey, key); - } - - public: - void set_deleted_key(const key_type &key) { - // the empty indicator (if specified) and the deleted indicator - // must be different - assert(!use_empty || !equals(key, get_key(emptyval))); - // It's only safe to change what "deleted" means if we purge deleted guys - squash_deleted(); - use_deleted = true; - delkey = key; - } - void clear_deleted_key() { - squash_deleted(); - use_deleted = false; - } - key_type deleted_key() const { - assert(use_deleted); - return delkey; - } - - // These are public so the iterators can use them - // True if the item at position bucknum is "deleted" marker - bool test_deleted(size_type bucknum) const { - return test_deleted_key(get_key(table[bucknum])); - } - bool test_deleted(const iterator &it) const { - return test_deleted_key(get_key(*it)); - } - bool test_deleted(const const_iterator &it) const { - return test_deleted_key(get_key(*it)); - } - - // Set it so test_deleted is true. true if object didn't used to be deleted. - bool set_deleted(iterator &it) { - assert(use_deleted); // bad if set_deleted_key() wasn't called - bool retval = !test_deleted(it); - // &* converts from iterator to value-type. - set_key(&(*it), delkey); - return retval; - } - // Set it so test_deleted is false. true if object used to be deleted - bool clear_deleted(iterator &it) { - assert(use_deleted); // bad if set_deleted_key() wasn't called - // Happens automatically when we assign something else in its place. - return test_deleted(it); - } - - // We also allow to set/clear the deleted bit on a const iterator. - // We allow a const_iterator for the same reason you can delete a - // const pointer: it's convenient, and semantically you can't use - // 'it' after it's been deleted anyway, so its const-ness doesn't - // really matter. - bool set_deleted(const_iterator &it) { - assert(use_deleted); // bad if set_deleted_key() wasn't called - bool retval = !test_deleted(it); - set_key(const_cast(&(*it)), delkey); - return retval; - } - // Set it so test_deleted is false. true if object used to be deleted - bool clear_deleted(const_iterator &it) { - assert(use_deleted); // bad if set_deleted_key() wasn't called - return test_deleted(it); - } - - // EMPTY HELPER FUNCTIONS - // This lets the user describe a key that will indicate empty (unused) - // table entries. This key should be an "impossible" entry -- - // if you try to insert it for real, you won't be able to retrieve it! - // (NB: while you pass in an entire value, only the key part is looked - // at. This is just because I don't know how to assign just a key.) - public: - // These are public so the iterators can use them - // True if the item at position bucknum is "empty" marker - bool test_empty(size_type bucknum) const { - assert(use_empty); // we always need to know what's empty! - return equals(get_key(emptyval), get_key(table[bucknum])); - } - bool test_empty(const iterator &it) const { - assert(use_empty); // we always need to know what's empty! - return equals(get_key(emptyval), get_key(*it)); - } - bool test_empty(const const_iterator &it) const { - assert(use_empty); // we always need to know what's empty! - return equals(get_key(emptyval), get_key(*it)); - } - - private: - // You can either set a range empty or an individual element - void set_empty(size_type bucknum) { - assert(use_empty); - set_value(&table[bucknum], emptyval); - } - void fill_range_with_empty(value_type* table_start, value_type* table_end) { - // Like set_empty(range), but doesn't destroy previous contents - STL_NAMESPACE::uninitialized_fill(table_start, table_end, emptyval); - } - void set_empty(size_type buckstart, size_type buckend) { - assert(use_empty); - destroy_buckets(buckstart, buckend); - fill_range_with_empty(table + buckstart, table + buckend); - } - - public: - // TODO(csilvers): change all callers of this to pass in a key instead, - // and take a const key_type instead of const value_type. - void set_empty_key(const value_type &val) { - // Once you set the empty key, you can't change it - assert(!use_empty); - // The deleted indicator (if specified) and the empty indicator - // must be different. - assert(!use_deleted || !equals(get_key(val), delkey)); - use_empty = true; - set_value(&emptyval, val); - - assert(!table); // must set before first use - // num_buckets was set in constructor even though table was NULL - table = (value_type *) malloc(num_buckets * sizeof(*table)); - assert(table); - fill_range_with_empty(table, table + num_buckets); - } - // TODO(sjackman): return a key_type rather than a value_type - value_type empty_key() const { - assert(use_empty); - return emptyval; - } - - // FUNCTIONS CONCERNING SIZE - public: - size_type size() const { return num_elements - num_deleted; } - // Buckets are always a power of 2 - size_type max_size() const { return (size_type(-1) >> 1U) + 1; } - bool empty() const { return size() == 0; } - size_type bucket_count() const { return num_buckets; } - size_type max_bucket_count() const { return max_size(); } - size_type nonempty_bucket_count() const { return num_elements; } - // These are tr1 methods. Their idea of 'bucket' doesn't map well to - // what we do. We just say every bucket has 0 or 1 items in it. - size_type bucket_size(size_type i) const { - return begin(i) == end(i) ? 0 : 1; - } - - - - private: - // Because of the above, size_type(-1) is never legal; use it for errors - static const size_type ILLEGAL_BUCKET = size_type(-1); - - private: - // This is the smallest size a hashtable can be without being too crowded - // If you like, you can give a min #buckets as well as a min #elts - size_type min_size(size_type num_elts, size_type min_buckets_wanted) { - size_type sz = HT_MIN_BUCKETS; // min buckets allowed - while ( sz < min_buckets_wanted || - num_elts >= static_cast(sz * enlarge_resize_percent) ) { - if (sz * 2 < sz) - throw std::length_error("resize overflow"); // protect against overflow - sz *= 2; - } - return sz; - } - - // Used after a string of deletes - void maybe_shrink() { - assert(num_elements >= num_deleted); - assert((bucket_count() & (bucket_count()-1)) == 0); // is a power of two - assert(bucket_count() >= HT_MIN_BUCKETS); - - // If you construct a hashtable with < HT_DEFAULT_STARTING_BUCKETS, - // we'll never shrink until you get relatively big, and we'll never - // shrink below HT_DEFAULT_STARTING_BUCKETS. Otherwise, something - // like "dense_hash_set x; x.insert(4); x.erase(4);" will - // shrink us down to HT_MIN_BUCKETS buckets, which is too small. - if (shrink_threshold > 0 && - (num_elements-num_deleted) < shrink_threshold && - bucket_count() > HT_DEFAULT_STARTING_BUCKETS ) { - size_type sz = bucket_count() / 2; // find how much we should shrink - while ( sz > HT_DEFAULT_STARTING_BUCKETS && - (num_elements - num_deleted) < sz * shrink_resize_percent ) - sz /= 2; // stay a power of 2 - dense_hashtable tmp(*this, sz); // Do the actual resizing - swap(tmp); // now we are tmp - } - consider_shrink = false; // because we just considered it - } - - // We'll let you resize a hashtable -- though this makes us copy all! - // When you resize, you say, "make it big enough for this many more elements" - void resize_delta(size_type delta) { - if ( consider_shrink ) // see if lots of deletes happened - maybe_shrink(); - if ( bucket_count() > HT_MIN_BUCKETS && - (num_elements + delta) <= enlarge_threshold ) - return; // we're ok as we are - - // Sometimes, we need to resize just to get rid of all the - // "deleted" buckets that are clogging up the hashtable. So when - // deciding whether to resize, count the deleted buckets (which - // are currently taking up room). But later, when we decide what - // size to resize to, *don't* count deleted buckets, since they - // get discarded during the resize. - const size_type needed_size = min_size(num_elements + delta, 0); - if ( needed_size > bucket_count() ) { // we don't have enough buckets - size_type resize_to = min_size(num_elements - num_deleted + delta, - bucket_count()); - if (resize_to < needed_size) { - // This situation means that we have enough deleted elements, - // that once we purge them, we won't actually have needed to - // grow. But we may want to grow anyway: if we just purge one - // element, say, we'll have to grow anyway next time we - // insert. Might as well grow now, since we're already going - // through the trouble of copying (in order to purge the - // deleted elements). - if (num_elements - num_deleted + delta >= - static_cast(resize_to*2 * shrink_resize_percent)) { - // Good, we won't be below the shrink threshhold even if we double. - resize_to *= 2; - } - } - dense_hashtable tmp(*this, resize_to); - swap(tmp); // now we are tmp - } - } - - // Increase number of buckets, assuming value_type has trivial copy - // constructor and destructor. (Really, we want it to have "trivial - // move", because that's what realloc does. But there's no way to - // capture that using type_traits, so we pretend that move(x, y) is - // equivalent to "x.~T(); new(x) T(y);" which is pretty much - // correct, if a bit conservative.) - void expand_array(size_t resize_to, true_type) { - table = (value_type *) realloc(table, resize_to * sizeof(value_type)); - assert(table); - fill_range_with_empty(table + num_buckets, table + resize_to); - } - - // Increase number of buckets, without special assumptions about value_type. - // TODO(austern): make this exception safe. Handle exceptions from - // value_type's copy constructor. - void expand_array(size_t resize_to, false_type) { - value_type* new_table = - (value_type *) malloc(resize_to * sizeof(value_type)); - assert(new_table); - STL_NAMESPACE::uninitialized_copy(table, table + num_buckets, new_table); - fill_range_with_empty(new_table + num_buckets, new_table + resize_to); - destroy_buckets(0, num_buckets); - free(table); - table = new_table; - } - - // Used to actually do the rehashing when we grow/shrink a hashtable - void copy_from(const dense_hashtable &ht, size_type min_buckets_wanted) { - clear(); // clear table, set num_deleted to 0 - - // If we need to change the size of our table, do it now - const size_type resize_to = min_size(ht.size(), min_buckets_wanted); - if ( resize_to > bucket_count() ) { // we don't have enough buckets - typedef integral_constant::value && - has_trivial_destructor::value)> - realloc_ok; // we pretend mv(x,y) == "x.~T(); new(x) T(y)" - expand_array(resize_to, realloc_ok()); - num_buckets = resize_to; - reset_thresholds(); - } - - // We use a normal iterator to get non-deleted bcks from ht - // We could use insert() here, but since we know there are - // no duplicates and no deleted items, we can be more efficient - assert((bucket_count() & (bucket_count()-1)) == 0); // a power of two - for ( const_iterator it = ht.begin(); it != ht.end(); ++it ) { - size_type num_probes = 0; // how many times we've probed - size_type bucknum; - const size_type bucket_count_minus_one = bucket_count() - 1; - for (bucknum = hash(get_key(*it)) & bucket_count_minus_one; - !test_empty(bucknum); // not empty - bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) { - ++num_probes; - assert(num_probes < bucket_count()); // or else the hashtable is full - } - set_value(&table[bucknum], *it); // copies the value to here - num_elements++; - } - num_ht_copies++; - } - - // Required by the spec for hashed associative container - public: - // Though the docs say this should be num_buckets, I think it's much - // more useful as req_elements. As a special feature, calling with - // req_elements==0 will cause us to shrink if we can, saving space. - void resize(size_type req_elements) { // resize to this or larger - if ( consider_shrink || req_elements == 0 ) - maybe_shrink(); - if ( req_elements > num_elements ) - return resize_delta(req_elements - num_elements); - } - - // Get and change the value of shrink_resize_percent and - // enlarge_resize_percent. The description at the beginning of this - // file explains how to choose the values. Setting the shrink - // parameter to 0.0 ensures that the table never shrinks. - void get_resizing_parameters(float* shrink, float* grow) const { - *shrink = shrink_resize_percent; - *grow = enlarge_resize_percent; - } - void set_resizing_parameters(float shrink, float grow) { - assert(shrink >= 0.0); - assert(grow <= 1.0); - if (shrink > grow/2.0f) - shrink = grow / 2.0f; // otherwise we thrash hashtable size - shrink_resize_percent = shrink; - enlarge_resize_percent = grow; - reset_thresholds(); - } - - // CONSTRUCTORS -- as required by the specs, we take a size, - // but also let you specify a hashfunction, key comparator, - // and key extractor. We also define a copy constructor and =. - // DESTRUCTOR -- needs to free the table - explicit dense_hashtable(size_type expected_max_items_in_table = 0, - const HashFcn& hf = HashFcn(), - const EqualKey& eql = EqualKey(), - const ExtractKey& ext = ExtractKey(), - const SetKey& set = SetKey()) - : hash(hf), equals(eql), get_key(ext), set_key(set), num_deleted(0), - use_deleted(false), use_empty(false), - delkey(), emptyval(), enlarge_resize_percent(HT_OCCUPANCY_FLT), - shrink_resize_percent(HT_EMPTY_FLT), table(NULL), - num_buckets(expected_max_items_in_table == 0 - ? HT_DEFAULT_STARTING_BUCKETS - : min_size(expected_max_items_in_table, 0)), - num_elements(0), num_ht_copies(0) { - // table is NULL until emptyval is set. However, we set num_buckets - // here so we know how much space to allocate once emptyval is set - reset_thresholds(); - } - - // As a convenience for resize(), we allow an optional second argument - // which lets you make this new hashtable a different size than ht - dense_hashtable(const dense_hashtable& ht, - size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS) - : hash(ht.hash), equals(ht.equals), - get_key(ht.get_key), set_key(ht.set_key), num_deleted(0), - use_deleted(ht.use_deleted), use_empty(ht.use_empty), - delkey(ht.delkey), emptyval(ht.emptyval), - enlarge_resize_percent(ht.enlarge_resize_percent), - shrink_resize_percent(ht.shrink_resize_percent), table(NULL), - num_buckets(0), num_elements(0), num_ht_copies(ht.num_ht_copies) { - if (!ht.use_empty) { - // If use_empty isn't set, copy_from will crash, so we do our own copying. - assert(ht.empty()); - num_buckets = min_size(ht.size(), min_buckets_wanted); - reset_thresholds(); - return; - } - reset_thresholds(); - copy_from(ht, min_buckets_wanted); // copy_from() ignores deleted entries - } - - dense_hashtable& operator= (const dense_hashtable& ht) { - if (&ht == this) return *this; // don't copy onto ourselves - if (!ht.use_empty) { - assert(ht.empty()); - dense_hashtable empty_table(ht); // empty table with ht's thresholds - this->swap(empty_table); - return *this; - } - hash = ht.hash; - equals = ht.equals; - get_key = ht.get_key; - set_key = ht.set_key; - use_deleted = ht.use_deleted; - use_empty = ht.use_empty; - delkey = ht.delkey; - set_value(&emptyval, ht.emptyval); - enlarge_resize_percent = ht.enlarge_resize_percent; - shrink_resize_percent = ht.shrink_resize_percent; - copy_from(ht, HT_MIN_BUCKETS); // calls clear and sets num_deleted to 0 too - return *this; - } - - ~dense_hashtable() { - if (table) { - destroy_buckets(0, num_buckets); - free(table); - } - } - - // Many STL algorithms use swap instead of copy constructors - void swap(dense_hashtable& ht) { - STL_NAMESPACE::swap(hash, ht.hash); - STL_NAMESPACE::swap(equals, ht.equals); - STL_NAMESPACE::swap(get_key, ht.get_key); - STL_NAMESPACE::swap(set_key, ht.set_key); - STL_NAMESPACE::swap(num_deleted, ht.num_deleted); - STL_NAMESPACE::swap(use_deleted, ht.use_deleted); - STL_NAMESPACE::swap(use_empty, ht.use_empty); - STL_NAMESPACE::swap(enlarge_resize_percent, ht.enlarge_resize_percent); - STL_NAMESPACE::swap(shrink_resize_percent, ht.shrink_resize_percent); - STL_NAMESPACE::swap(delkey, ht.delkey); - { value_type tmp; // for annoying reasons, swap() doesn't work - set_value(&tmp, emptyval); - set_value(&emptyval, ht.emptyval); - set_value(&ht.emptyval, tmp); - } - STL_NAMESPACE::swap(table, ht.table); - STL_NAMESPACE::swap(num_buckets, ht.num_buckets); - STL_NAMESPACE::swap(num_elements, ht.num_elements); - STL_NAMESPACE::swap(num_ht_copies, ht.num_ht_copies); - reset_thresholds(); - ht.reset_thresholds(); - } - - // It's always nice to be able to clear a table without deallocating it - void clear() { - const size_type new_num_buckets = min_size(0,0); - if (num_elements == 0 && - num_deleted == 0 && - new_num_buckets == num_buckets) { - // Table is already empty, and the number of buckets is already as we - // desire, so nothing to do. - return; - } - if (table) - destroy_buckets(0, num_buckets); - if (!table || (new_num_buckets != num_buckets)) { - // Recompute the resize thresholds and realloc the table only if we're - // actually changing its size. - num_buckets = new_num_buckets; // our new size - reset_thresholds(); - table = (value_type *) realloc(table, num_buckets * sizeof(*table)); - } - assert(table); - fill_range_with_empty(table, table + num_buckets); - num_elements = 0; - num_deleted = 0; - } - - // Clear the table without resizing it. - // Mimicks the stl_hashtable's behaviour when clear()-ing in that it - // does not modify the bucket count - void clear_no_resize() { - if (table) { - set_empty(0, num_buckets); - } - // don't consider to shrink before another erase() - reset_thresholds(); - num_elements = 0; - num_deleted = 0; - } - - // LOOKUP ROUTINES - private: - // Returns a pair of positions: 1st where the object is, 2nd where - // it would go if you wanted to insert it. 1st is ILLEGAL_BUCKET - // if object is not found; 2nd is ILLEGAL_BUCKET if it is. - // Note: because of deletions where-to-insert is not trivial: it's the - // first deleted bucket we see, as long as we don't find the key later - pair find_position(const key_type &key) const { - size_type num_probes = 0; // how many times we've probed - const size_type bucket_count_minus_one = bucket_count() - 1; - size_type bucknum = hash(key) & bucket_count_minus_one; - size_type insert_pos = ILLEGAL_BUCKET; // where we would insert - while ( 1 ) { // probe until something happens - if ( test_empty(bucknum) ) { // bucket is empty - if ( insert_pos == ILLEGAL_BUCKET ) // found no prior place to insert - return pair(ILLEGAL_BUCKET, bucknum); - else - return pair(ILLEGAL_BUCKET, insert_pos); - - } else if ( test_deleted(bucknum) ) {// keep searching, but mark to insert - if ( insert_pos == ILLEGAL_BUCKET ) - insert_pos = bucknum; - - } else if ( equals(key, get_key(table[bucknum])) ) { - return pair(bucknum, ILLEGAL_BUCKET); - } - ++num_probes; // we're doing another probe - bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one; - assert(num_probes < bucket_count()); // don't probe too many times! - } - } - - public: - iterator find(const key_type& key) { - if ( size() == 0 ) return end(); - pair pos = find_position(key); - if ( pos.first == ILLEGAL_BUCKET ) // alas, not there - return end(); - else - return iterator(this, table + pos.first, table + num_buckets, false); - } - - const_iterator find(const key_type& key) const { - if ( size() == 0 ) return end(); - pair pos = find_position(key); - if ( pos.first == ILLEGAL_BUCKET ) // alas, not there - return end(); - else - return const_iterator(this, table + pos.first, table+num_buckets, false); - } - - // This is a tr1 method: the bucket a given key is in, or what bucket - // it would be put in, if it were to be inserted. Shrug. - size_type bucket(const key_type& key) const { - pair pos = find_position(key); - return pos.first == ILLEGAL_BUCKET ? pos.second : pos.first; - } - - // Counts how many elements have key key. For maps, it's either 0 or 1. - size_type count(const key_type &key) const { - pair pos = find_position(key); - return pos.first == ILLEGAL_BUCKET ? 0 : 1; - } - - // Likewise, equal_range doesn't really make sense for us. Oh well. - pair equal_range(const key_type& key) { - iterator pos = find(key); // either an iterator or end - if (pos == end()) { - return pair(pos, pos); - } else { - const iterator startpos = pos++; - return pair(startpos, pos); - } - } - pair equal_range(const key_type& key) const { - const_iterator pos = find(key); // either an iterator or end - if (pos == end()) { - return pair(pos, pos); - } else { - const const_iterator startpos = pos++; - return pair(startpos, pos); - } - } - - - // INSERTION ROUTINES - private: - // If you know *this is big enough to hold obj, use this routine - pair insert_noresize(const value_type& obj) { - // First, double-check we're not inserting delkey or emptyval - assert(!use_empty || !equals(get_key(obj), get_key(emptyval))); - assert(!use_deleted || !equals(get_key(obj), delkey)); - const pair pos = find_position(get_key(obj)); - if ( pos.first != ILLEGAL_BUCKET) { // object was already there - return pair(iterator(this, table + pos.first, - table + num_buckets, false), - false); // false: we didn't insert - } else { // pos.second says where to put it - if ( test_deleted(pos.second) ) { // just replace if it's been del. - const_iterator delpos(this, table + pos.second, // shrug: - table + num_buckets, false);// shouldn't need const - clear_deleted(delpos); - assert( num_deleted > 0); - --num_deleted; // used to be, now it isn't - } else { - ++num_elements; // replacing an empty bucket - } - set_value(&table[pos.second], obj); - return pair(iterator(this, table + pos.second, - table + num_buckets, false), - true); // true: we did insert - } - } - - public: - // This is the normal insert routine, used by the outside world - pair insert(const value_type& obj) { - resize_delta(1); // adding an object, grow if need be - return insert_noresize(obj); - } - - // When inserting a lot at a time, we specialize on the type of iterator - template - void insert(InputIterator f, InputIterator l) { - // specializes on iterator type - insert(f, l, typename STL_NAMESPACE::iterator_traits::iterator_category()); - } - - // Iterator supports operator-, resize before inserting - template - void insert(ForwardIterator f, ForwardIterator l, - STL_NAMESPACE::forward_iterator_tag) { - size_type n = STL_NAMESPACE::distance(f, l); // TODO(csilvers): standard? - resize_delta(n); - for ( ; n > 0; --n, ++f) - insert_noresize(*f); - } - - // Arbitrary iterator, can't tell how much to resize - template - void insert(InputIterator f, InputIterator l, - STL_NAMESPACE::input_iterator_tag) { - for ( ; f != l; ++f) - insert(*f); - } - - - // DELETION ROUTINES - size_type erase(const key_type& key) { - // First, double-check we're not trying to erase delkey or emptyval - assert(!use_empty || !equals(key, get_key(emptyval))); - assert(!use_deleted || !equals(key, delkey)); - const_iterator pos = find(key); // shrug: shouldn't need to be const - if ( pos != end() ) { - assert(!test_deleted(pos)); // or find() shouldn't have returned it - set_deleted(pos); - ++num_deleted; - consider_shrink = true; // will think about shrink after next insert - return 1; // because we deleted one thing - } else { - return 0; // because we deleted nothing - } - } - - // We return the iterator past the deleted item. - void erase(iterator pos) { - if ( pos == end() ) return; // sanity check - if ( set_deleted(pos) ) { // true if object has been newly deleted - ++num_deleted; - consider_shrink = true; // will think about shrink after next insert - } - } - - void erase(iterator f, iterator l) { - for ( ; f != l; ++f) { - if ( set_deleted(f) ) // should always be true - ++num_deleted; - } - consider_shrink = true; // will think about shrink after next insert - } - - // We allow you to erase a const_iterator just like we allow you to - // erase an iterator. This is in parallel to 'delete': you can delete - // a const pointer just like a non-const pointer. The logic is that - // you can't use the object after it's erased anyway, so it doesn't matter - // if it's const or not. - void erase(const_iterator pos) { - if ( pos == end() ) return; // sanity check - if ( set_deleted(pos) ) { // true if object has been newly deleted - ++num_deleted; - consider_shrink = true; // will think about shrink after next insert - } - } - void erase(const_iterator f, const_iterator l) { - for ( ; f != l; ++f) { - if ( set_deleted(f) ) // should always be true - ++num_deleted; - } - consider_shrink = true; // will think about shrink after next insert - } - - - // COMPARISON - bool operator==(const dense_hashtable& ht) const { - if (size() != ht.size()) { - return false; - } else if (this == &ht) { - return true; - } else { - // Iterate through the elements in "this" and see if the - // corresponding element is in ht - for ( const_iterator it = begin(); it != end(); ++it ) { - const_iterator it2 = ht.find(get_key(*it)); - if ((it2 == ht.end()) || (*it != *it2)) { - return false; - } - } - return true; - } - } - bool operator!=(const dense_hashtable& ht) const { - return !(*this == ht); - } - - - // I/O - // We support reading and writing hashtables to disk. Alas, since - // I don't know how to write a hasher or key_equal, you have to make - // sure everything but the table is the same. We compact before writing - // - // NOTE: These functions are currently TODO. They've not been implemented. - bool write_metadata(FILE *fp) { - squash_deleted(); // so we don't have to worry about delkey - return false; // TODO - } - - bool read_metadata(FILE *fp) { - num_deleted = 0; // since we got rid before writing - assert(use_empty); // have to set this before calling us - if (table) free(table); // we'll make our own - // TODO: read magic number - // TODO: read num_buckets - reset_thresholds(); - table = (value_type *) malloc(num_buckets * sizeof(*table)); - assert(table); - fill_range_with_empty(table, table + num_buckets); - // TODO: read num_elements - for ( size_type i = 0; i < num_elements; ++i ) { - // TODO: read bucket_num - // TODO: set with non-empty, non-deleted value - } - return false; // TODO - } - - // If your keys and values are simple enough, we can write them to - // disk for you. "simple enough" means value_type is a POD type - // that contains no pointers. However, we don't try to normalize - // endianness - bool write_nopointer_data(FILE *fp) const { - for ( const_iterator it = begin(); it != end(); ++it ) { - // TODO: skip empty/deleted values - if ( !fwrite(&*it, sizeof(*it), 1, fp) ) return false; - } - return false; - } - - // When reading, we have to override the potential const-ness of *it - bool read_nopointer_data(FILE *fp) { - for ( iterator it = begin(); it != end(); ++it ) { - // TODO: skip empty/deleted values - if ( !fread(reinterpret_cast(&(*it)), sizeof(*it), 1, fp) ) - return false; - } - return false; - } - - private: - // The actual data - hasher hash; // required by hashed_associative_container - key_equal equals; - ExtractKey get_key; - SetKey set_key; - size_type num_deleted; // how many occupied buckets are marked deleted - bool use_deleted; // false until delkey has been set - bool use_empty; // you must do this before you start - // TODO(csilvers): make a pointer, and get rid of use_deleted (benchmark!) - key_type delkey; // which key marks deleted entries - value_type emptyval; // which key marks unused entries - float enlarge_resize_percent; // how full before resize - float shrink_resize_percent; // how empty before resize - size_type shrink_threshold; // num_buckets * shrink_resize_percent - size_type enlarge_threshold; // num_buckets * enlarge_resize_percent - value_type *table; - size_type num_buckets; - size_type num_elements; - int num_ht_copies; // a statistics counter incremented every Copy - bool consider_shrink; // true if we should try to shrink before next insert - - void reset_thresholds() { - enlarge_threshold = static_cast(num_buckets - * enlarge_resize_percent); - shrink_threshold = static_cast(num_buckets - * shrink_resize_percent); - consider_shrink = false; // whatever caused us to reset already considered - } -}; - -// We need a global swap as well -template -inline void swap(dense_hashtable &x, - dense_hashtable &y) { - x.swap(y); -} - -#undef JUMP_ - -template -const typename dense_hashtable::size_type -dense_hashtable::ILLEGAL_BUCKET; - -// How full we let the table get before we resize. Knuth says .8 is -// good -- higher causes us to probe too much, though saves memory. -// However, we go with .5, getting better performance at the cost of -// more space (a trade-off densehashtable explicitly chooses to make). -// Feel free to play around with different values, though. -template -const float dense_hashtable::HT_OCCUPANCY_FLT = 0.5f; - -// How empty we let the table get before we resize lower. -// It should be less than OCCUPANCY_FLT / 2 or we thrash resizing -template -const float dense_hashtable::HT_EMPTY_FLT - = 0.4f * dense_hashtable::HT_OCCUPANCY_FLT; - -_END_GOOGLE_NAMESPACE_ - -#endif /* _DENSEHASHTABLE_H_ */ diff --git a/src/sparsehash-1.6/src/google/sparsehash/sparseconfig.h b/src/sparsehash-1.6/src/google/sparsehash/sparseconfig.h deleted file mode 100644 index f397d3b..0000000 --- a/src/sparsehash-1.6/src/google/sparsehash/sparseconfig.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * NOTE: This file is for internal use only. - * Do not use these #defines in your own program! - */ - -/* Namespace for Google classes */ -#define GOOGLE_NAMESPACE ::google - -/* the location of the header defining hash functions */ -#define HASH_FUN_H - -/* the namespace of the hash<> function */ -#define HASH_NAMESPACE std::tr1 - -/* Define to 1 if you have the header file. */ -#define HAVE_INTTYPES_H 1 - -/* Define to 1 if the system has the type `long long'. */ -#define HAVE_LONG_LONG 1 - -/* Define to 1 if you have the `memcpy' function. */ -#define HAVE_MEMCPY 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TYPES_H 1 - -/* Define to 1 if the system has the type `uint16_t'. */ -#define HAVE_UINT16_T 1 - -/* Define to 1 if the system has the type `u_int16_t'. */ -#define HAVE_U_INT16_T 1 - -/* Define to 1 if the system has the type `__uint16'. */ -/* #undef HAVE___UINT16 */ - -/* The system-provided hash function including the namespace. */ -#define SPARSEHASH_HASH HASH_NAMESPACE::hash - -/* the namespace where STL code like vector<> is defined */ -#define STL_NAMESPACE std - -/* Stops putting the code inside the Google namespace */ -#define _END_GOOGLE_NAMESPACE_ } - -/* Puts following code inside the Google namespace */ -#define _START_GOOGLE_NAMESPACE_ namespace google { diff --git a/src/sparsehash-1.6/src/google/sparsehash/sparsehashtable.h b/src/sparsehash-1.6/src/google/sparsehash/sparsehashtable.h deleted file mode 100644 index 62c9dc8..0000000 --- a/src/sparsehash-1.6/src/google/sparsehash/sparsehashtable.h +++ /dev/null @@ -1,1081 +0,0 @@ -// Copyright (c) 2005, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// --- -// Author: Craig Silverstein -// -// A sparse hashtable is a particular implementation of -// a hashtable: one that is meant to minimize memory use. -// It does this by using a *sparse table* (cf sparsetable.h), -// which uses between 1 and 2 bits to store empty buckets -// (we may need another bit for hashtables that support deletion). -// -// When empty buckets are so cheap, an appealing hashtable -// implementation is internal probing, in which the hashtable -// is a single table, and collisions are resolved by trying -// to insert again in another bucket. The most cache-efficient -// internal probing schemes are linear probing (which suffers, -// alas, from clumping) and quadratic probing, which is what -// we implement by default. -// -// Deleted buckets are a bit of a pain. We have to somehow mark -// deleted buckets (the probing must distinguish them from empty -// buckets). The most principled way is to have another bitmap, -// but that's annoying and takes up space. Instead we let the -// user specify an "impossible" key. We set deleted buckets -// to have the impossible key. -// -// Note it is possible to change the value of the delete key -// on the fly; you can even remove it, though after that point -// the hashtable is insert_only until you set it again. -// -// You probably shouldn't use this code directly. Use -// or instead. -// -// You can modify the following, below: -// HT_OCCUPANCY_FLT -- how full before we double size -// HT_EMPTY_FLT -- how empty before we halve size -// HT_MIN_BUCKETS -- smallest bucket size -// HT_DEFAULT_STARTING_BUCKETS -- default bucket size at construct-time -// -// You can also change enlarge_resize_percent (which defaults to -// HT_OCCUPANCY_FLT), and shrink_resize_percent (which defaults to -// HT_EMPTY_FLT) with set_resizing_parameters(). -// -// How to decide what values to use? -// shrink_resize_percent's default of .4 * OCCUPANCY_FLT, is probably good. -// HT_MIN_BUCKETS is probably unnecessary since you can specify -// (indirectly) the starting number of buckets at construct-time. -// For enlarge_resize_percent, you can use this chart to try to trade-off -// expected lookup time to the space taken up. By default, this -// code uses quadratic probing, though you can change it to linear -// via _JUMP below if you really want to. -// -// From http://www.augustana.ca/~mohrj/courses/1999.fall/csc210/lecture_notes/hashing.html -// NUMBER OF PROBES / LOOKUP Successful Unsuccessful -// Quadratic collision resolution 1 - ln(1-L) - L/2 1/(1-L) - L - ln(1-L) -// Linear collision resolution [1+1/(1-L)]/2 [1+1/(1-L)2]/2 -// -// -- enlarge_resize_percent -- 0.10 0.50 0.60 0.75 0.80 0.90 0.99 -// QUADRATIC COLLISION RES. -// probes/successful lookup 1.05 1.44 1.62 2.01 2.21 2.85 5.11 -// probes/unsuccessful lookup 1.11 2.19 2.82 4.64 5.81 11.4 103.6 -// LINEAR COLLISION RES. -// probes/successful lookup 1.06 1.5 1.75 2.5 3.0 5.5 50.5 -// probes/unsuccessful lookup 1.12 2.5 3.6 8.5 13.0 50.0 5000.0 -// -// The value type is required to be copy constructible and default -// constructible, but it need not be (and commonly isn't) assignable. - -#ifndef _SPARSEHASHTABLE_H_ -#define _SPARSEHASHTABLE_H_ - -#ifndef SPARSEHASH_STAT_UPDATE -#define SPARSEHASH_STAT_UPDATE(x) ((void) 0) -#endif - -// The probing method -// Linear probing -// #define JUMP_(key, num_probes) ( 1 ) -// Quadratic-ish probing -#define JUMP_(key, num_probes) ( num_probes ) - - -#include -#include -#include // For swap(), eg -#include // For length_error -#include // for facts about iterator tags -#include // for pair<> -#include // Since that's basically what we are - -_START_GOOGLE_NAMESPACE_ - -using STL_NAMESPACE::pair; - -// Hashtable class, used to implement the hashed associative containers -// hash_set and hash_map. -// -// Value: what is stored in the table (each bucket is a Value). -// Key: something in a 1-to-1 correspondence to a Value, that can be used -// to search for a Value in the table (find() takes a Key). -// HashFcn: Takes a Key and returns an integer, the more unique the better. -// ExtractKey: given a Value, returns the unique Key associated with it. -// SetKey: given a Value* and a Key, modifies the value such that -// ExtractKey(value) == key. We guarantee this is only called -// with key == deleted_key. -// EqualKey: Given two Keys, says whether they are the same (that is, -// if they are both associated with the same Value). -// Alloc: STL allocator to use to allocate memory. Currently ignored. - -template -class sparse_hashtable; - -template -struct sparse_hashtable_iterator; - -template -struct sparse_hashtable_const_iterator; - -// As far as iterating, we're basically just a sparsetable -// that skips over deleted elements. -template -struct sparse_hashtable_iterator { - public: - typedef sparse_hashtable_iterator iterator; - typedef sparse_hashtable_const_iterator const_iterator; - typedef typename sparsetable::nonempty_iterator st_iterator; - - typedef STL_NAMESPACE::forward_iterator_tag iterator_category; - typedef V value_type; - typedef ptrdiff_t difference_type; - typedef size_t size_type; - typedef V& reference; // Value - typedef V* pointer; - - // "Real" constructor and default constructor - sparse_hashtable_iterator(const sparse_hashtable *h, - st_iterator it, st_iterator it_end) - : ht(h), pos(it), end(it_end) { advance_past_deleted(); } - sparse_hashtable_iterator() { } // not ever used internally - // The default destructor is fine; we don't define one - // The default operator= is fine; we don't define one - - // Happy dereferencer - reference operator*() const { return *pos; } - pointer operator->() const { return &(operator*()); } - - // Arithmetic. The only hard part is making sure that - // we're not on a marked-deleted array element - void advance_past_deleted() { - while ( pos != end && ht->test_deleted(*this) ) - ++pos; - } - iterator& operator++() { - assert(pos != end); ++pos; advance_past_deleted(); return *this; - } - iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; } - - // Comparison. - bool operator==(const iterator& it) const { return pos == it.pos; } - bool operator!=(const iterator& it) const { return pos != it.pos; } - - - // The actual data - const sparse_hashtable *ht; - st_iterator pos, end; -}; - -// Now do it all again, but with const-ness! -template -struct sparse_hashtable_const_iterator { - public: - typedef sparse_hashtable_iterator iterator; - typedef sparse_hashtable_const_iterator const_iterator; - typedef typename sparsetable::const_nonempty_iterator st_iterator; - - typedef STL_NAMESPACE::forward_iterator_tag iterator_category; - typedef V value_type; - typedef ptrdiff_t difference_type; - typedef size_t size_type; - typedef const V& reference; // Value - typedef const V* pointer; - - // "Real" constructor and default constructor - sparse_hashtable_const_iterator(const sparse_hashtable *h, - st_iterator it, st_iterator it_end) - : ht(h), pos(it), end(it_end) { advance_past_deleted(); } - // This lets us convert regular iterators to const iterators - sparse_hashtable_const_iterator() { } // never used internally - sparse_hashtable_const_iterator(const iterator &it) - : ht(it.ht), pos(it.pos), end(it.end) { } - // The default destructor is fine; we don't define one - // The default operator= is fine; we don't define one - - // Happy dereferencer - reference operator*() const { return *pos; } - pointer operator->() const { return &(operator*()); } - - // Arithmetic. The only hard part is making sure that - // we're not on a marked-deleted array element - void advance_past_deleted() { - while ( pos != end && ht->test_deleted(*this) ) - ++pos; - } - const_iterator& operator++() { - assert(pos != end); ++pos; advance_past_deleted(); return *this; - } - const_iterator operator++(int) { const_iterator tmp(*this); ++*this; return tmp; } - - // Comparison. - bool operator==(const const_iterator& it) const { return pos == it.pos; } - bool operator!=(const const_iterator& it) const { return pos != it.pos; } - - - // The actual data - const sparse_hashtable *ht; - st_iterator pos, end; -}; - -// And once again, but this time freeing up memory as we iterate -template -struct sparse_hashtable_destructive_iterator { - public: - typedef sparse_hashtable_destructive_iterator iterator; - typedef typename sparsetable::destructive_iterator st_iterator; - - typedef STL_NAMESPACE::forward_iterator_tag iterator_category; - typedef V value_type; - typedef ptrdiff_t difference_type; - typedef size_t size_type; - typedef V& reference; // Value - typedef V* pointer; - - // "Real" constructor and default constructor - sparse_hashtable_destructive_iterator(const - sparse_hashtable *h, - st_iterator it, st_iterator it_end) - : ht(h), pos(it), end(it_end) { advance_past_deleted(); } - sparse_hashtable_destructive_iterator() { } // never used internally - // The default destructor is fine; we don't define one - // The default operator= is fine; we don't define one - - // Happy dereferencer - reference operator*() const { return *pos; } - pointer operator->() const { return &(operator*()); } - - // Arithmetic. The only hard part is making sure that - // we're not on a marked-deleted array element - void advance_past_deleted() { - while ( pos != end && ht->test_deleted(*this) ) - ++pos; - } - iterator& operator++() { - assert(pos != end); ++pos; advance_past_deleted(); return *this; - } - iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; } - - // Comparison. - bool operator==(const iterator& it) const { return pos == it.pos; } - bool operator!=(const iterator& it) const { return pos != it.pos; } - - - // The actual data - const sparse_hashtable *ht; - st_iterator pos, end; -}; - - -template -class sparse_hashtable { - public: - typedef Key key_type; - typedef Value value_type; - typedef HashFcn hasher; - typedef EqualKey key_equal; - - typedef size_t size_type; - typedef ptrdiff_t difference_type; - typedef value_type* pointer; - typedef const value_type* const_pointer; - typedef value_type& reference; - typedef const value_type& const_reference; - typedef sparse_hashtable_iterator - iterator; - - typedef sparse_hashtable_const_iterator - const_iterator; - - typedef sparse_hashtable_destructive_iterator - destructive_iterator; - - // These come from tr1. For us they're the same as regular iterators. - typedef iterator local_iterator; - typedef const_iterator const_local_iterator; - - // How full we let the table get before we resize, by default. - // Knuth says .8 is good -- higher causes us to probe too much, - // though it saves memory. - static const float HT_OCCUPANCY_FLT; // = 0.8f; - - // How empty we let the table get before we resize lower, by default. - // It should be less than OCCUPANCY_FLT / 2 or we thrash resizing - static const float HT_EMPTY_FLT; // = 0.4 * HT_OCCUPANCY_FLT; - - // Minimum size we're willing to let hashtables be. - // Must be a power of two, and at least 4. - // Note, however, that for a given hashtable, the minimum size is - // determined by the first constructor arg, and may be >HT_MIN_BUCKETS. - static const size_t HT_MIN_BUCKETS = 4; - - // By default, if you don't specify a hashtable size at - // construction-time, we use this size. Must be a power of two, and - // at least HT_MIN_BUCKETS. - static const size_t HT_DEFAULT_STARTING_BUCKETS = 32; - - // ITERATOR FUNCTIONS - iterator begin() { return iterator(this, table.nonempty_begin(), - table.nonempty_end()); } - iterator end() { return iterator(this, table.nonempty_end(), - table.nonempty_end()); } - const_iterator begin() const { return const_iterator(this, - table.nonempty_begin(), - table.nonempty_end()); } - const_iterator end() const { return const_iterator(this, - table.nonempty_end(), - table.nonempty_end()); } - - // These come from tr1 unordered_map. They iterate over 'bucket' n. - // For sparsehashtable, we could consider each 'group' to be a bucket, - // I guess, but I don't really see the point. We'll just consider - // bucket n to be the n-th element of the sparsetable, if it's occupied, - // or some empty element, otherwise. - local_iterator begin(size_type i) { - if (table.test(i)) - return local_iterator(this, table.get_iter(i), table.nonempty_end()); - else - return local_iterator(this, table.nonempty_end(), table.nonempty_end()); - } - local_iterator end(size_type i) { - local_iterator it = begin(i); - if (table.test(i) && !test_deleted(i)) - ++it; - return it; - } - const_local_iterator begin(size_type i) const { - if (table.test(i)) - return const_local_iterator(this, table.get_iter(i), - table.nonempty_end()); - else - return const_local_iterator(this, table.nonempty_end(), - table.nonempty_end()); - } - const_local_iterator end(size_type i) const { - const_local_iterator it = begin(i); - if (table.test(i) && !test_deleted(i)) - ++it; - return it; - } - - // This is used when resizing - destructive_iterator destructive_begin() { - return destructive_iterator(this, table.destructive_begin(), - table.destructive_end()); - } - destructive_iterator destructive_end() { - return destructive_iterator(this, table.destructive_end(), - table.destructive_end()); - } - - - // ACCESSOR FUNCTIONS for the things we templatize on, basically - hasher hash_funct() const { return hash; } - key_equal key_eq() const { return equals; } - - // Accessor function for statistics gathering. - int num_table_copies() const { return num_ht_copies; } - - private: - // We need to copy values when we set the special marker for deleted - // elements, but, annoyingly, we can't just use the copy assignment - // operator because value_type might not be assignable (it's often - // pair). We use explicit destructor invocation and - // placement new to get around this. Arg. - void set_value(value_type* dst, const value_type src) { - dst->~value_type(); // delete the old value, if any - new(dst) value_type(src); - } - - // This is used as a tag for the copy constructor, saying to destroy its - // arg We have two ways of destructively copying: with potentially growing - // the hashtable as we copy, and without. To make sure the outside world - // can't do a destructive copy, we make the typename private. - enum MoveDontCopyT {MoveDontCopy, MoveDontGrow}; - - - // DELETE HELPER FUNCTIONS - // This lets the user describe a key that will indicate deleted - // table entries. This key should be an "impossible" entry -- - // if you try to insert it for real, you won't be able to retrieve it! - // (NB: while you pass in an entire value, only the key part is looked - // at. This is just because I don't know how to assign just a key.) - private: - void squash_deleted() { // gets rid of any deleted entries we have - if ( num_deleted ) { // get rid of deleted before writing - sparse_hashtable tmp(MoveDontGrow, *this); - swap(tmp); // now we are tmp - } - assert(num_deleted == 0); - } - - public: - void set_deleted_key(const key_type &key) { - // It's only safe to change what "deleted" means if we purge deleted guys - squash_deleted(); - use_deleted = true; - delkey = key; - } - void clear_deleted_key() { - squash_deleted(); - use_deleted = false; - } - key_type deleted_key() const { - assert(use_deleted); - return delkey; - } - - // These are public so the iterators can use them - // True if the item at position bucknum is "deleted" marker - bool test_deleted(size_type bucknum) const { - // The num_deleted test is crucial for read(): after read(), the ht values - // are garbage, and we don't want to think some of them are deleted. - return (use_deleted && num_deleted > 0 && table.test(bucknum) && - equals(delkey, get_key(table.unsafe_get(bucknum)))); - } - bool test_deleted(const iterator &it) const { - return (use_deleted && num_deleted > 0 && - equals(delkey, get_key(*it))); - } - bool test_deleted(const const_iterator &it) const { - return (use_deleted && num_deleted > 0 && - equals(delkey, get_key(*it))); - } - bool test_deleted(const destructive_iterator &it) const { - return (use_deleted && num_deleted > 0 && - equals(delkey, get_key(*it))); - } - // Set it so test_deleted is true. true if object didn't used to be deleted. - bool set_deleted(iterator &it) { - assert(use_deleted); - bool retval = !test_deleted(it); - // &* converts from iterator to value-type. - set_key(&(*it), delkey); - return retval; - } - // Set it so test_deleted is false. true if object used to be deleted. - bool clear_deleted(iterator &it) { - assert(use_deleted); - // Happens automatically when we assign something else in its place. - return test_deleted(it); - } - - // We also allow to set/clear the deleted bit on a const iterator. - // We allow a const_iterator for the same reason you can delete a - // const pointer: it's convenient, and semantically you can't use - // 'it' after it's been deleted anyway, so its const-ness doesn't - // really matter. - bool set_deleted(const_iterator &it) { - assert(use_deleted); // bad if set_deleted_key() wasn't called - bool retval = !test_deleted(it); - set_key(const_cast(&(*it)), delkey); - return retval; - } - bool clear_deleted(const_iterator &it) { - assert(use_deleted); // bad if set_deleted_key() wasn't called - return test_deleted(it); - } - - - // FUNCTIONS CONCERNING SIZE - size_type size() const { return table.num_nonempty() - num_deleted; } - // Buckets are always a power of 2 - size_type max_size() const { return (size_type(-1) >> 1U) + 1; } - bool empty() const { return size() == 0; } - size_type bucket_count() const { return table.size(); } - size_type max_bucket_count() const { return max_size(); } - // These are tr1 methods. Their idea of 'bucket' doesn't map well to - // what we do. We just say every bucket has 0 or 1 items in it. - size_type bucket_size(size_type i) const { - return begin(i) == end(i) ? 0 : 1; - } - - - private: - // Because of the above, size_type(-1) is never legal; use it for errors - static const size_type ILLEGAL_BUCKET = size_type(-1); - - private: - // This is the smallest size a hashtable can be without being too crowded - // If you like, you can give a min #buckets as well as a min #elts - size_type min_size(size_type num_elts, size_type min_buckets_wanted) { - size_type sz = HT_MIN_BUCKETS; // min buckets allowed - while ( sz < min_buckets_wanted || - num_elts >= static_cast(sz * enlarge_resize_percent) ) { - if (sz * 2 < sz) - throw std::length_error("resize overflow"); // protect against overflow - sz *= 2; - } - return sz; - } - - // Used after a string of deletes - void maybe_shrink() { - assert(table.num_nonempty() >= num_deleted); - assert((bucket_count() & (bucket_count()-1)) == 0); // is a power of two - assert(bucket_count() >= HT_MIN_BUCKETS); - - // If you construct a hashtable with < HT_DEFAULT_STARTING_BUCKETS, - // we'll never shrink until you get relatively big, and we'll never - // shrink below HT_DEFAULT_STARTING_BUCKETS. Otherwise, something - // like "dense_hash_set x; x.insert(4); x.erase(4);" will - // shrink us down to HT_MIN_BUCKETS buckets, which is too small. - if (shrink_threshold > 0 && - (table.num_nonempty()-num_deleted) < shrink_threshold && - bucket_count() > HT_DEFAULT_STARTING_BUCKETS) { - size_type sz = bucket_count() / 2; // find how much we should shrink - while ( sz > HT_DEFAULT_STARTING_BUCKETS && - (table.num_nonempty() - num_deleted) <= sz * - shrink_resize_percent ) - sz /= 2; // stay a power of 2 - sparse_hashtable tmp(MoveDontCopy, *this, sz); - swap(tmp); // now we are tmp - } - consider_shrink = false; // because we just considered it - } - - // We'll let you resize a hashtable -- though this makes us copy all! - // When you resize, you say, "make it big enough for this many more elements" - void resize_delta(size_type delta) { - if ( consider_shrink ) // see if lots of deletes happened - maybe_shrink(); - if ( bucket_count() >= HT_MIN_BUCKETS && - (table.num_nonempty() + delta) <= enlarge_threshold ) - return; // we're ok as we are - - // Sometimes, we need to resize just to get rid of all the - // "deleted" buckets that are clogging up the hashtable. So when - // deciding whether to resize, count the deleted buckets (which - // are currently taking up room). But later, when we decide what - // size to resize to, *don't* count deleted buckets, since they - // get discarded during the resize. - const size_type needed_size = min_size(table.num_nonempty() + delta, 0); - if ( needed_size > bucket_count() ) { // we don't have enough buckets - size_type resize_to = min_size(table.num_nonempty() - num_deleted + delta, - bucket_count()); - if (resize_to < needed_size) { - // This situation means that we have enough deleted elements, - // that once we purge them, we won't actually have needed to - // grow. But we may want to grow anyway: if we just purge one - // element, say, we'll have to grow anyway next time we - // insert. Might as well grow now, since we're already going - // through the trouble of copying (in order to purge the - // deleted elements). - if (table.num_nonempty() - num_deleted + delta >= - static_cast(resize_to*2 * shrink_resize_percent)) { - // Good, we won't be below the shrink threshhold even if we double. - resize_to *= 2; - } - } - - sparse_hashtable tmp(MoveDontCopy, *this, resize_to); - swap(tmp); // now we are tmp - } - } - - // Used to actually do the rehashing when we grow/shrink a hashtable - void copy_from(const sparse_hashtable &ht, size_type min_buckets_wanted) { - clear(); // clear table, set num_deleted to 0 - - // If we need to change the size of our table, do it now - const size_type resize_to = min_size(ht.size(), min_buckets_wanted); - if ( resize_to > bucket_count() ) { // we don't have enough buckets - table.resize(resize_to); // sets the number of buckets - reset_thresholds(); - } - - // We use a normal iterator to get non-deleted bcks from ht - // We could use insert() here, but since we know there are - // no duplicates and no deleted items, we can be more efficient - assert( (bucket_count() & (bucket_count()-1)) == 0); // a power of two - for ( const_iterator it = ht.begin(); it != ht.end(); ++it ) { - size_type num_probes = 0; // how many times we've probed - size_type bucknum; - const size_type bucket_count_minus_one = bucket_count() - 1; - for (bucknum = hash(get_key(*it)) & bucket_count_minus_one; - table.test(bucknum); // not empty - bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) { - ++num_probes; - assert(num_probes < bucket_count()); // or else the hashtable is full - } - table.set(bucknum, *it); // copies the value to here - } - num_ht_copies++; - } - - // Implementation is like copy_from, but it destroys the table of the - // "from" guy by freeing sparsetable memory as we iterate. This is - // useful in resizing, since we're throwing away the "from" guy anyway. - void move_from(MoveDontCopyT mover, sparse_hashtable &ht, - size_type min_buckets_wanted) { - clear(); // clear table, set num_deleted to 0 - - // If we need to change the size of our table, do it now - size_t resize_to; - if ( mover == MoveDontGrow ) - resize_to = ht.bucket_count(); // keep same size as old ht - else // MoveDontCopy - resize_to = min_size(ht.size(), min_buckets_wanted); - if ( resize_to > bucket_count() ) { // we don't have enough buckets - table.resize(resize_to); // sets the number of buckets - reset_thresholds(); - } - - // We use a normal iterator to get non-deleted bcks from ht - // We could use insert() here, but since we know there are - // no duplicates and no deleted items, we can be more efficient - assert( (bucket_count() & (bucket_count()-1)) == 0); // a power of two - // THIS IS THE MAJOR LINE THAT DIFFERS FROM COPY_FROM(): - for ( destructive_iterator it = ht.destructive_begin(); - it != ht.destructive_end(); ++it ) { - size_type num_probes = 0; // how many times we've probed - size_type bucknum; - for ( bucknum = hash(get_key(*it)) & (bucket_count()-1); // h % buck_cnt - table.test(bucknum); // not empty - bucknum = (bucknum + JUMP_(key, num_probes)) & (bucket_count()-1) ) { - ++num_probes; - assert(num_probes < bucket_count()); // or else the hashtable is full - } - table.set(bucknum, *it); // copies the value to here - } - num_ht_copies++; - } - - - // Required by the spec for hashed associative container - public: - // Though the docs say this should be num_buckets, I think it's much - // more useful as num_elements. As a special feature, calling with - // req_elements==0 will cause us to shrink if we can, saving space. - void resize(size_type req_elements) { // resize to this or larger - if ( consider_shrink || req_elements == 0 ) - maybe_shrink(); - if ( req_elements > table.num_nonempty() ) // we only grow - resize_delta(req_elements - table.num_nonempty()); - } - - // Get and change the value of shrink_resize_percent and - // enlarge_resize_percent. The description at the beginning of this - // file explains how to choose the values. Setting the shrink - // parameter to 0.0 ensures that the table never shrinks. - void get_resizing_parameters(float* shrink, float* grow) const { - *shrink = shrink_resize_percent; - *grow = enlarge_resize_percent; - } - void set_resizing_parameters(float shrink, float grow) { - assert(shrink >= 0.0); - assert(grow <= 1.0); - if (shrink > grow/2.0f) - shrink = grow / 2.0f; // otherwise we thrash hashtable size - shrink_resize_percent = shrink; - enlarge_resize_percent = grow; - reset_thresholds(); - } - - // CONSTRUCTORS -- as required by the specs, we take a size, - // but also let you specify a hashfunction, key comparator, - // and key extractor. We also define a copy constructor and =. - // DESTRUCTOR -- the default is fine, surprisingly. - explicit sparse_hashtable(size_type expected_max_items_in_table = 0, - const HashFcn& hf = HashFcn(), - const EqualKey& eql = EqualKey(), - const SetKey& set = SetKey(), - const ExtractKey& ext = ExtractKey()) - : hash(hf), equals(eql), get_key(ext), set_key(set), num_deleted(0), - use_deleted(false), delkey(), enlarge_resize_percent(HT_OCCUPANCY_FLT), - shrink_resize_percent(HT_EMPTY_FLT), - table(expected_max_items_in_table == 0 - ? HT_DEFAULT_STARTING_BUCKETS - : min_size(expected_max_items_in_table, 0)), - num_ht_copies(0) { - reset_thresholds(); - } - - // As a convenience for resize(), we allow an optional second argument - // which lets you make this new hashtable a different size than ht. - // We also provide a mechanism of saying you want to "move" the ht argument - // into us instead of copying. - sparse_hashtable(const sparse_hashtable& ht, - size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS) - : hash(ht.hash), equals(ht.equals), - get_key(ht.get_key), set_key(ht.set_key), num_deleted(0), - use_deleted(ht.use_deleted), delkey(ht.delkey), - enlarge_resize_percent(ht.enlarge_resize_percent), - shrink_resize_percent(ht.shrink_resize_percent), - table(), num_ht_copies(ht.num_ht_copies) { - reset_thresholds(); - copy_from(ht, min_buckets_wanted); // copy_from() ignores deleted entries - } - sparse_hashtable(MoveDontCopyT mover, sparse_hashtable& ht, - size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS) - : hash(ht.hash), equals(ht.equals), get_key(ht.get_key), - num_deleted(0), use_deleted(ht.use_deleted), delkey(ht.delkey), - enlarge_resize_percent(ht.enlarge_resize_percent), - shrink_resize_percent(ht.shrink_resize_percent), - table(), num_ht_copies(ht.num_ht_copies) { - reset_thresholds(); - move_from(mover, ht, min_buckets_wanted); // ignores deleted entries - } - - sparse_hashtable& operator= (const sparse_hashtable& ht) { - if (&ht == this) return *this; // don't copy onto ourselves - hash = ht.hash; - equals = ht.equals; - get_key = ht.get_key; - set_key = ht.set_key; - use_deleted = ht.use_deleted; - delkey = ht.delkey; - copy_from(ht, HT_MIN_BUCKETS); // calls clear and sets num_deleted to 0 too - return *this; - } - - // Many STL algorithms use swap instead of copy constructors - void swap(sparse_hashtable& ht) { - STL_NAMESPACE::swap(hash, ht.hash); - STL_NAMESPACE::swap(equals, ht.equals); - STL_NAMESPACE::swap(get_key, ht.get_key); - STL_NAMESPACE::swap(set_key, ht.set_key); - STL_NAMESPACE::swap(num_deleted, ht.num_deleted); - STL_NAMESPACE::swap(use_deleted, ht.use_deleted); - STL_NAMESPACE::swap(enlarge_resize_percent, ht.enlarge_resize_percent); - STL_NAMESPACE::swap(shrink_resize_percent, ht.shrink_resize_percent); - STL_NAMESPACE::swap(delkey, ht.delkey); - table.swap(ht.table); - STL_NAMESPACE::swap(num_ht_copies, ht.num_ht_copies); - reset_thresholds(); - ht.reset_thresholds(); - } - - // It's always nice to be able to clear a table without deallocating it - void clear() { - if (!empty() || (num_deleted != 0)) { - table.clear(); - } - reset_thresholds(); - num_deleted = 0; - } - - - // LOOKUP ROUTINES - private: - // Returns a pair of positions: 1st where the object is, 2nd where - // it would go if you wanted to insert it. 1st is ILLEGAL_BUCKET - // if object is not found; 2nd is ILLEGAL_BUCKET if it is. - // Note: because of deletions where-to-insert is not trivial: it's the - // first deleted bucket we see, as long as we don't find the key later - pair find_position(const key_type &key) const { - size_type num_probes = 0; // how many times we've probed - const size_type bucket_count_minus_one = bucket_count() - 1; - size_type bucknum = hash(key) & bucket_count_minus_one; - size_type insert_pos = ILLEGAL_BUCKET; // where we would insert - SPARSEHASH_STAT_UPDATE(total_lookups += 1); - while ( 1 ) { // probe until something happens - if ( !table.test(bucknum) ) { // bucket is empty - SPARSEHASH_STAT_UPDATE(total_probes += num_probes); - if ( insert_pos == ILLEGAL_BUCKET ) // found no prior place to insert - return pair(ILLEGAL_BUCKET, bucknum); - else - return pair(ILLEGAL_BUCKET, insert_pos); - - } else if ( test_deleted(bucknum) ) {// keep searching, but mark to insert - if ( insert_pos == ILLEGAL_BUCKET ) - insert_pos = bucknum; - - } else if ( equals(key, get_key(table.unsafe_get(bucknum))) ) { - SPARSEHASH_STAT_UPDATE(total_probes += num_probes); - return pair(bucknum, ILLEGAL_BUCKET); - } - ++num_probes; // we're doing another probe - bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one; - assert(num_probes < bucket_count()); // don't probe too many times! - } - } - - public: - iterator find(const key_type& key) { - if ( size() == 0 ) return end(); - pair pos = find_position(key); - if ( pos.first == ILLEGAL_BUCKET ) // alas, not there - return end(); - else - return iterator(this, table.get_iter(pos.first), table.nonempty_end()); - } - - const_iterator find(const key_type& key) const { - if ( size() == 0 ) return end(); - pair pos = find_position(key); - if ( pos.first == ILLEGAL_BUCKET ) // alas, not there - return end(); - else - return const_iterator(this, - table.get_iter(pos.first), table.nonempty_end()); - } - - // This is a tr1 method: the bucket a given key is in, or what bucket - // it would be put in, if it were to be inserted. Shrug. - size_type bucket(const key_type& key) const { - pair pos = find_position(key); - return pos.first == ILLEGAL_BUCKET ? pos.second : pos.first; - } - - // Counts how many elements have key key. For maps, it's either 0 or 1. - size_type count(const key_type &key) const { - pair pos = find_position(key); - return pos.first == ILLEGAL_BUCKET ? 0 : 1; - } - - // Likewise, equal_range doesn't really make sense for us. Oh well. - pair equal_range(const key_type& key) { - iterator pos = find(key); // either an iterator or end - if (pos == end()) { - return pair(pos, pos); - } else { - const iterator startpos = pos++; - return pair(startpos, pos); - } - } - pair equal_range(const key_type& key) const { - const_iterator pos = find(key); // either an iterator or end - if (pos == end()) { - return pair(pos, pos); - } else { - const const_iterator startpos = pos++; - return pair(startpos, pos); - } - } - - - // INSERTION ROUTINES - private: - // If you know *this is big enough to hold obj, use this routine - pair insert_noresize(const value_type& obj) { - // First, double-check we're not inserting delkey - assert(!use_deleted || !equals(get_key(obj), delkey)); - const pair pos = find_position(get_key(obj)); - if ( pos.first != ILLEGAL_BUCKET) { // object was already there - return pair(iterator(this, table.get_iter(pos.first), - table.nonempty_end()), - false); // false: we didn't insert - } else { // pos.second says where to put it - if ( test_deleted(pos.second) ) { // just replace if it's been del. - // The set() below will undelete this object. We just worry about stats - assert(num_deleted > 0); - --num_deleted; // used to be, now it isn't - } - table.set(pos.second, obj); - return pair(iterator(this, table.get_iter(pos.second), - table.nonempty_end()), - true); // true: we did insert - } - } - - public: - // This is the normal insert routine, used by the outside world - pair insert(const value_type& obj) { - resize_delta(1); // adding an object, grow if need be - return insert_noresize(obj); - } - - // When inserting a lot at a time, we specialize on the type of iterator - template - void insert(InputIterator f, InputIterator l) { - // specializes on iterator type - insert(f, l, typename STL_NAMESPACE::iterator_traits::iterator_category()); - } - - // Iterator supports operator-, resize before inserting - template - void insert(ForwardIterator f, ForwardIterator l, - STL_NAMESPACE::forward_iterator_tag) { - size_type n = STL_NAMESPACE::distance(f, l); // TODO(csilvers): standard? - resize_delta(n); - for ( ; n > 0; --n, ++f) - insert_noresize(*f); - } - - // Arbitrary iterator, can't tell how much to resize - template - void insert(InputIterator f, InputIterator l, - STL_NAMESPACE::input_iterator_tag) { - for ( ; f != l; ++f) - insert(*f); - } - - - // DELETION ROUTINES - size_type erase(const key_type& key) { - // First, double-check we're not erasing delkey. - assert(!use_deleted || !equals(key, delkey)); - const_iterator pos = find(key); // shrug: shouldn't need to be const - if ( pos != end() ) { - assert(!test_deleted(pos)); // or find() shouldn't have returned it - set_deleted(pos); - ++num_deleted; - consider_shrink = true; // will think about shrink after next insert - return 1; // because we deleted one thing - } else { - return 0; // because we deleted nothing - } - } - - // We return the iterator past the deleted item. - void erase(iterator pos) { - if ( pos == end() ) return; // sanity check - if ( set_deleted(pos) ) { // true if object has been newly deleted - ++num_deleted; - consider_shrink = true; // will think about shrink after next insert - } - } - - void erase(iterator f, iterator l) { - for ( ; f != l; ++f) { - if ( set_deleted(f) ) // should always be true - ++num_deleted; - } - consider_shrink = true; // will think about shrink after next insert - } - - // We allow you to erase a const_iterator just like we allow you to - // erase an iterator. This is in parallel to 'delete': you can delete - // a const pointer just like a non-const pointer. The logic is that - // you can't use the object after it's erased anyway, so it doesn't matter - // if it's const or not. - void erase(const_iterator pos) { - if ( pos == end() ) return; // sanity check - if ( set_deleted(pos) ) { // true if object has been newly deleted - ++num_deleted; - consider_shrink = true; // will think about shrink after next insert - } - } - void erase(const_iterator f, const_iterator l) { - for ( ; f != l; ++f) { - if ( set_deleted(f) ) // should always be true - ++num_deleted; - } - consider_shrink = true; // will think about shrink after next insert - } - - - // COMPARISON - bool operator==(const sparse_hashtable& ht) const { - // We really want to check that the hash functions are the same - // but alas there's no way to do this. We just hope. - return ( num_deleted == ht.num_deleted && table == ht.table ); - } - bool operator!=(const sparse_hashtable& ht) const { - return !(*this == ht); - } - - - // I/O - // We support reading and writing hashtables to disk. NOTE that - // this only stores the hashtable metadata, not the stuff you've - // actually put in the hashtable! Alas, since I don't know how to - // write a hasher or key_equal, you have to make sure everything - // but the table is the same. We compact before writing. - bool write_metadata(FILE *fp) { - squash_deleted(); // so we don't have to worry about delkey - return table.write_metadata(fp); - } - - bool read_metadata(FILE *fp) { - num_deleted = 0; // since we got rid before writing - bool result = table.read_metadata(fp); - reset_thresholds(); - return result; - } - - // Only meaningful if value_type is a POD. - bool write_nopointer_data(FILE *fp) { - return table.write_nopointer_data(fp); - } - - // Only meaningful if value_type is a POD. - bool read_nopointer_data(FILE *fp) { - return table.read_nopointer_data(fp); - } - - private: - // The actual data - hasher hash; // required by hashed_associative_container - key_equal equals; - ExtractKey get_key; - SetKey set_key; - size_type num_deleted; // how many occupied buckets are marked deleted - bool use_deleted; // false until delkey has been set - // TODO(csilvers): make a pointer, and get rid of use_deleted (benchmark!) - key_type delkey; // which key marks deleted entries - float enlarge_resize_percent; // how full before resize - float shrink_resize_percent; // how empty before resize - size_type shrink_threshold; // table.size() * shrink_resize_percent - size_type enlarge_threshold; // table.size() * enlarge_resize_percent - sparsetable table; // holds num_buckets and num_elements too - bool consider_shrink; // true if we should try to shrink before next insert - int num_ht_copies; // a statistics counter incremented every Copy/Move - - void reset_thresholds() { - enlarge_threshold = static_cast(table.size() - * enlarge_resize_percent); - shrink_threshold = static_cast(table.size() - * shrink_resize_percent); - consider_shrink = false; // whatever caused us to reset already considered - } -}; - -// We need a global swap as well -template -inline void swap(sparse_hashtable &x, - sparse_hashtable &y) { - x.swap(y); -} - -#undef JUMP_ - -template -const typename sparse_hashtable::size_type - sparse_hashtable::ILLEGAL_BUCKET; - -// How full we let the table get before we resize. Knuth says .8 is -// good -- higher causes us to probe too much, though saves memory -template -const float sparse_hashtable::HT_OCCUPANCY_FLT = 0.8f; - -// How empty we let the table get before we resize lower. -// It should be less than OCCUPANCY_FLT / 2 or we thrash resizing -template -const float sparse_hashtable::HT_EMPTY_FLT = 0.4f * -sparse_hashtable::HT_OCCUPANCY_FLT; - -_END_GOOGLE_NAMESPACE_ - -#endif /* _SPARSEHASHTABLE_H_ */ diff --git a/src/sparsehash-1.6/src/google/sparsetable b/src/sparsehash-1.6/src/google/sparsetable deleted file mode 100644 index 6dc0937..0000000 --- a/src/sparsehash-1.6/src/google/sparsetable +++ /dev/null @@ -1,1473 +0,0 @@ -// Copyright (c) 2005, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// --- -// Author: Craig Silverstein -// -// A sparsetable is a random container that implements a sparse array, -// that is, an array that uses very little memory to store unassigned -// indices (in this case, between 1-2 bits per unassigned index). For -// instance, if you allocate an array of size 5 and assign a[2] = , then a[2] will take up a lot of memory but a[0], a[1], -// a[3], and a[4] will not. Array elements that have a value are -// called "assigned". Array elements that have no value yet, or have -// had their value cleared using erase() or clear(), are called -// "unassigned". -// -// Unassigned values seem to have the default value of T (see below). -// Nevertheless, there is a difference between an unassigned index and -// one explicitly assigned the value of T(). The latter is considered -// assigned. -// -// Access to an array element is constant time, as is insertion and -// deletion. Insertion and deletion may be fairly slow, however: -// because of this container's memory economy, each insert and delete -// causes a memory reallocation. -// -// See /usr/(local/)?doc/sparsehash-0.1/sparsetable.html -// for information about how to use this class. - -#ifndef _SPARSETABLE_H_ -#define _SPARSETABLE_H_ - -#include -#include // for malloc/free -#include // to read/write tables -#ifdef HAVE_STDINT_H -#include // the normal place uint16_t is defined -#endif -#ifdef HAVE_SYS_TYPES_H -#include // the normal place u_int16_t is defined -#endif -#ifdef HAVE_INTTYPES_H -#include // a third place for uint16_t or u_int16_t -#endif -#include // for bounds checking -#include // to define reverse_iterator for me -#include // equal, lexicographical_compare, swap,... -#include // uninitialized_copy -#include // a sparsetable is a vector of groups -#include // for true_type, integral_constant, etc. - -#if STDC_HEADERS -#include // for memcpy -#else -#if !HAVE_MEMCPY -#define memcpy(d, s, n) bcopy ((s), (d), (n)) -#endif -#endif - -_START_GOOGLE_NAMESPACE_ - -#ifndef HAVE_U_INT16_T -# if defined HAVE_UINT16_T - typedef uint16_t u_int16_t; // true on solaris, possibly other C99 libc's -# elif defined HAVE___UINT16 - typedef __int16 int16_t; // true on vc++7 - typedef unsigned __int16 u_int16_t; -# else - // Cannot find a 16-bit integer type. Hoping for the best with "short"... - typedef short int int16_t; - typedef unsigned short int u_int16_t; -# endif -#endif - -using STL_NAMESPACE::vector; -using STL_NAMESPACE::uninitialized_copy; - -// The smaller this is, the faster lookup is (because the group bitmap is -// smaller) and the faster insert is, because there's less to move. -// On the other hand, there are more groups. Since group::size_type is -// a short, this number should be of the form 32*x + 16 to avoid waste. -static const u_int16_t DEFAULT_SPARSEGROUP_SIZE = 48; // fits in 1.5 words - - -// A NOTE ON ASSIGNING: -// A sparse table does not actually allocate memory for entries -// that are not filled. Because of this, it becomes complicated -// to have a non-const iterator: we don't know, if the iterator points -// to a not-filled bucket, whether you plan to fill it with something -// or whether you plan to read its value (in which case you'll get -// the default bucket value). Therefore, while we can define const -// operations in a pretty 'normal' way, for non-const operations, we -// define something that returns a helper object with operator= and -// operator& that allocate a bucket lazily. We use this for table[] -// and also for regular table iterators. - -template -class table_element_adaptor { - public: - typedef typename tabletype::value_type value_type; - typedef typename tabletype::size_type size_type; - typedef typename tabletype::reference reference; - typedef typename tabletype::pointer pointer; - - table_element_adaptor(tabletype *tbl, size_type p) - : table(tbl), pos(p) { } - table_element_adaptor& operator= (const value_type &val) { - table->set(pos, val); - return *this; - } - operator value_type() { return table->get(pos); } // we look like a value - pointer operator& () { return &table->mutating_get(pos); } - - private: - tabletype* table; - size_type pos; -}; - -// Our iterator as simple as iterators can be: basically it's just -// the index into our table. Dereference, the only complicated -// thing, we punt to the table class. This just goes to show how -// much machinery STL requires to do even the most trivial tasks. -// -// By templatizing over tabletype, we have one iterator type which -// we can use for both sparsetables and sparsebins. In fact it -// works on any class that allows size() and operator[] (eg vector), -// as long as it does the standard STL typedefs too (eg value_type). - -template -class table_iterator { - public: - typedef table_iterator iterator; - - typedef STL_NAMESPACE::random_access_iterator_tag iterator_category; - typedef typename tabletype::value_type value_type; - typedef typename tabletype::difference_type difference_type; - typedef typename tabletype::size_type size_type; - typedef table_element_adaptor reference; - typedef table_element_adaptor* pointer; - - // The "real" constructor - table_iterator(tabletype *tbl, size_type p) - : table(tbl), pos(p) { } - // The default constructor, used when I define vars of type table::iterator - table_iterator() : table(NULL), pos(0) { } - // The copy constructor, for when I say table::iterator foo = tbl.begin() - // The default destructor is fine; we don't define one - // The default operator= is fine; we don't define one - - // The main thing our iterator does is dereference. If the table entry - // we point to is empty, we return the default value type. - // This is the big different function from the const iterator. - reference operator*() { - return table_element_adaptor(table, pos); - } - pointer operator->() { return &(operator*()); } - - // Helper function to assert things are ok; eg pos is still in range - void check() const { - assert(table); - assert(pos <= table->size()); - } - - // Arithmetic: we just do arithmetic on pos. We don't even need to - // do bounds checking, since STL doesn't consider that it's job. :-) - iterator& operator+=(size_type t) { pos += t; check(); return *this; } - iterator& operator-=(size_type t) { pos -= t; check(); return *this; } - iterator& operator++() { ++pos; check(); return *this; } - iterator& operator--() { --pos; check(); return *this; } - iterator operator++(int) { iterator tmp(*this); // for x++ - ++pos; check(); return tmp; } - iterator operator--(int) { iterator tmp(*this); // for x-- - --pos; check(); return tmp; } - iterator operator+(difference_type i) const { iterator tmp(*this); - tmp += i; return tmp; } - iterator operator-(difference_type i) const { iterator tmp(*this); - tmp -= i; return tmp; } - difference_type operator-(iterator it) const { // for "x = it2 - it" - assert(table == it.table); - return pos - it.pos; - } - reference operator[](difference_type n) const { - return *(*this + n); // simple though not totally efficient - } - - // Comparisons. - bool operator==(const iterator& it) const { - return table == it.table && pos == it.pos; - } - bool operator<(const iterator& it) const { - assert(table == it.table); // life is bad bad bad otherwise - return pos < it.pos; - } - bool operator!=(const iterator& it) const { return !(*this == it); } - bool operator<=(const iterator& it) const { return !(it < *this); } - bool operator>(const iterator& it) const { return it < *this; } - bool operator>=(const iterator& it) const { return !(*this < it); } - - // Here's the info we actually need to be an iterator - tabletype *table; // so we can dereference and bounds-check - size_type pos; // index into the table -}; - -// support for "3 + iterator" has to be defined outside the class, alas -template -table_iterator operator+(typename table_iterator::difference_type i, - table_iterator it) { - return it + i; // so people can say it2 = 3 + it -} - -template -class const_table_iterator { - public: - typedef table_iterator iterator; - typedef const_table_iterator const_iterator; - - typedef STL_NAMESPACE::random_access_iterator_tag iterator_category; - typedef typename tabletype::value_type value_type; - typedef typename tabletype::difference_type difference_type; - typedef typename tabletype::size_type size_type; - typedef typename tabletype::const_reference reference; // we're const-only - typedef typename tabletype::const_pointer pointer; - - // The "real" constructor - const_table_iterator(const tabletype *tbl, size_type p) - : table(tbl), pos(p) { } - // The default constructor, used when I define vars of type table::iterator - const_table_iterator() : table(NULL), pos(0) { } - // The copy constructor, for when I say table::iterator foo = tbl.begin() - // Also converts normal iterators to const iterators - const_table_iterator(const iterator &from) - : table(from.table), pos(from.pos) { } - // The default destructor is fine; we don't define one - // The default operator= is fine; we don't define one - - // The main thing our iterator does is dereference. If the table entry - // we point to is empty, we return the default value type. - reference operator*() const { return (*table)[pos]; } - pointer operator->() const { return &(operator*()); } - - // Helper function to assert things are ok; eg pos is still in range - void check() const { - assert(table); - assert(pos <= table->size()); - } - - // Arithmetic: we just do arithmetic on pos. We don't even need to - // do bounds checking, since STL doesn't consider that it's job. :-) - const_iterator& operator+=(size_type t) { pos += t; check(); return *this; } - const_iterator& operator-=(size_type t) { pos -= t; check(); return *this; } - const_iterator& operator++() { ++pos; check(); return *this; } - const_iterator& operator--() { --pos; check(); return *this; } - const_iterator operator++(int) { const_iterator tmp(*this); // for x++ - ++pos; check(); return tmp; } - const_iterator operator--(int) { const_iterator tmp(*this); // for x-- - --pos; check(); return tmp; } - const_iterator operator+(difference_type i) const { const_iterator tmp(*this); - tmp += i; return tmp; } - const_iterator operator-(difference_type i) const { const_iterator tmp(*this); - tmp -= i; return tmp; } - difference_type operator-(const_iterator it) const { // for "x = it2 - it" - assert(table == it.table); - return pos - it.pos; - } - reference operator[](difference_type n) const { - return *(*this + n); // simple though not totally efficient - } - - // Comparisons. - bool operator==(const const_iterator& it) const { - return table == it.table && pos == it.pos; - } - bool operator<(const const_iterator& it) const { - assert(table == it.table); // life is bad bad bad otherwise - return pos < it.pos; - } - bool operator!=(const const_iterator& it) const { return !(*this == it); } - bool operator<=(const const_iterator& it) const { return !(it < *this); } - bool operator>(const const_iterator& it) const { return it < *this; } - bool operator>=(const const_iterator& it) const { return !(*this < it); } - - // Here's the info we actually need to be an iterator - const tabletype *table; // so we can dereference and bounds-check - size_type pos; // index into the table -}; - -// support for "3 + iterator" has to be defined outside the class, alas -template -const_table_iterator operator+(typename - const_table_iterator::difference_type i, - const_table_iterator it) { - return it + i; // so people can say it2 = 3 + it -} - - -// --------------------------------------------------------------------------- - - -/* -// This is a 2-D iterator. You specify a begin and end over a list -// of *containers*. We iterate over each container by iterating over -// it. It's actually simple: -// VECTOR.begin() VECTOR[0].begin() --------> VECTOR[0].end() ---, -// | ________________________________________________/ -// | \_> VECTOR[1].begin() --------> VECTOR[1].end() -, -// | ___________________________________________________/ -// v \_> ...... -// VECTOR.end() -// -// It's impossible to do random access on one of these things in constant -// time, so it's just a bidirectional iterator. -// -// Unfortunately, because we need to use this for a non-empty iterator, -// we use nonempty_begin() and nonempty_end() instead of begin() and end() -// (though only going across, not down). -*/ - -#define TWOD_BEGIN_ nonempty_begin -#define TWOD_END_ nonempty_end -#define TWOD_ITER_ nonempty_iterator -#define TWOD_CONST_ITER_ const_nonempty_iterator - -template -class two_d_iterator { - public: - typedef two_d_iterator iterator; - - typedef STL_NAMESPACE::bidirectional_iterator_tag iterator_category; - // apparently some versions of VC++ have trouble with two ::'s in a typename - typedef typename containertype::value_type _tmp_vt; - typedef typename _tmp_vt::value_type value_type; - typedef typename _tmp_vt::difference_type difference_type; - typedef typename _tmp_vt::reference reference; - typedef typename _tmp_vt::pointer pointer; - - // The "real" constructor. begin and end specify how many rows we have - // (in the diagram above); we always iterate over each row completely. - two_d_iterator(typename containertype::iterator begin, - typename containertype::iterator end, - typename containertype::iterator curr) - : row_begin(begin), row_end(end), row_current(curr), col_current() { - if ( row_current != row_end ) { - col_current = row_current->TWOD_BEGIN_(); - advance_past_end(); // in case cur->begin() == cur->end() - } - } - // If you want to start at an arbitrary place, you can, I guess - two_d_iterator(typename containertype::iterator begin, - typename containertype::iterator end, - typename containertype::iterator curr, - typename containertype::value_type::TWOD_ITER_ col) - : row_begin(begin), row_end(end), row_current(curr), col_current(col) { - advance_past_end(); // in case cur->begin() == cur->end() - } - // The default constructor, used when I define vars of type table::iterator - two_d_iterator() : row_begin(), row_end(), row_current(), col_current() { } - // The default destructor is fine; we don't define one - // The default operator= is fine; we don't define one - - // Happy dereferencer - reference operator*() const { return *col_current; } - pointer operator->() const { return &(operator*()); } - - // Arithmetic: we just do arithmetic on pos. We don't even need to - // do bounds checking, since STL doesn't consider that it's job. :-) - // NOTE: this is not amortized constant time! What do we do about it? - void advance_past_end() { // used when col_current points to end() - while ( col_current == row_current->TWOD_END_() ) { // end of current row - ++row_current; // go to beginning of next - if ( row_current != row_end ) // col is irrelevant at end - col_current = row_current->TWOD_BEGIN_(); - else - break; // don't go past row_end - } - } - - iterator& operator++() { - assert(row_current != row_end); // how to ++ from there? - ++col_current; - advance_past_end(); // in case col_current is at end() - return *this; - } - iterator& operator--() { - while ( row_current == row_end || - col_current == row_current->TWOD_BEGIN_() ) { - assert(row_current != row_begin); - --row_current; - col_current = row_current->TWOD_END_(); // this is 1 too far - } - --col_current; - return *this; - } - iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; } - iterator operator--(int) { iterator tmp(*this); --*this; return tmp; } - - - // Comparisons. - bool operator==(const iterator& it) const { - return ( row_begin == it.row_begin && - row_end == it.row_end && - row_current == it.row_current && - (row_current == row_end || col_current == it.col_current) ); - } - bool operator!=(const iterator& it) const { return !(*this == it); } - - - // Here's the info we actually need to be an iterator - // These need to be public so we convert from iterator to const_iterator - typename containertype::iterator row_begin, row_end, row_current; - typename containertype::value_type::TWOD_ITER_ col_current; -}; - -// The same thing again, but this time const. :-( -template -class const_two_d_iterator { - public: - typedef const_two_d_iterator iterator; - - typedef STL_NAMESPACE::bidirectional_iterator_tag iterator_category; - // apparently some versions of VC++ have trouble with two ::'s in a typename - typedef typename containertype::value_type _tmp_vt; - typedef typename _tmp_vt::value_type value_type; - typedef typename _tmp_vt::difference_type difference_type; - typedef typename _tmp_vt::const_reference reference; - typedef typename _tmp_vt::const_pointer pointer; - - const_two_d_iterator(typename containertype::const_iterator begin, - typename containertype::const_iterator end, - typename containertype::const_iterator curr) - : row_begin(begin), row_end(end), row_current(curr), col_current() { - if ( curr != end ) { - col_current = curr->TWOD_BEGIN_(); - advance_past_end(); // in case cur->begin() == cur->end() - } - } - const_two_d_iterator(typename containertype::const_iterator begin, - typename containertype::const_iterator end, - typename containertype::const_iterator curr, - typename containertype::value_type::TWOD_CONST_ITER_ col) - : row_begin(begin), row_end(end), row_current(curr), col_current(col) { - advance_past_end(); // in case cur->begin() == cur->end() - } - const_two_d_iterator() - : row_begin(), row_end(), row_current(), col_current() { - } - // Need this explicitly so we can convert normal iterators to const iterators - const_two_d_iterator(const two_d_iterator& it) : - row_begin(it.row_begin), row_end(it.row_end), row_current(it.row_current), - col_current(it.col_current) { } - - typename containertype::const_iterator row_begin, row_end, row_current; - typename containertype::value_type::TWOD_CONST_ITER_ col_current; - - - // EVERYTHING FROM HERE DOWN IS THE SAME AS THE NON-CONST ITERATOR - reference operator*() const { return *col_current; } - pointer operator->() const { return &(operator*()); } - - void advance_past_end() { // used when col_current points to end() - while ( col_current == row_current->TWOD_END_() ) { // end of current row - ++row_current; // go to beginning of next - if ( row_current != row_end ) // col is irrelevant at end - col_current = row_current->TWOD_BEGIN_(); - else - break; // don't go past row_end - } - } - iterator& operator++() { - assert(row_current != row_end); // how to ++ from there? - ++col_current; - advance_past_end(); // in case col_current is at end() - return *this; - } - iterator& operator--() { - while ( row_current == row_end || - col_current == row_current->TWOD_BEGIN_() ) { - assert(row_current != row_begin); - --row_current; - col_current = row_current->TWOD_END_(); // this is 1 too far - } - --col_current; - return *this; - } - iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; } - iterator operator--(int) { iterator tmp(*this); --*this; return tmp; } - - bool operator==(const iterator& it) const { - return ( row_begin == it.row_begin && - row_end == it.row_end && - row_current == it.row_current && - (row_current == row_end || col_current == it.col_current) ); - } - bool operator!=(const iterator& it) const { return !(*this == it); } -}; - -// We provide yet another version, to be as frugal with memory as -// possible. This one frees each block of memory as it finishes -// iterating over it. By the end, the entire table is freed. -// For understandable reasons, you can only iterate over it once, -// which is why it's an input iterator -template -class destructive_two_d_iterator { - public: - typedef destructive_two_d_iterator iterator; - - typedef STL_NAMESPACE::input_iterator_tag iterator_category; - // apparently some versions of VC++ have trouble with two ::'s in a typename - typedef typename containertype::value_type _tmp_vt; - typedef typename _tmp_vt::value_type value_type; - typedef typename _tmp_vt::difference_type difference_type; - typedef typename _tmp_vt::reference reference; - typedef typename _tmp_vt::pointer pointer; - - destructive_two_d_iterator(typename containertype::iterator begin, - typename containertype::iterator end, - typename containertype::iterator curr) - : row_begin(begin), row_end(end), row_current(curr), col_current() { - if ( curr != end ) { - col_current = curr->TWOD_BEGIN_(); - advance_past_end(); // in case cur->begin() == cur->end() - } - } - destructive_two_d_iterator(typename containertype::iterator begin, - typename containertype::iterator end, - typename containertype::iterator curr, - typename containertype::value_type::TWOD_ITER_ col) - : row_begin(begin), row_end(end), row_current(curr), col_current(col) { - advance_past_end(); // in case cur->begin() == cur->end() - } - destructive_two_d_iterator() - : row_begin(), row_end(), row_current(), col_current() { - } - - typename containertype::iterator row_begin, row_end, row_current; - typename containertype::value_type::TWOD_ITER_ col_current; - - // This is the part that destroys - void advance_past_end() { // used when col_current points to end() - while ( col_current == row_current->TWOD_END_() ) { // end of current row - row_current->clear(); // the destructive part - // It would be nice if we could decrement sparsetable->num_buckets here - ++row_current; // go to beginning of next - if ( row_current != row_end ) // col is irrelevant at end - col_current = row_current->TWOD_BEGIN_(); - else - break; // don't go past row_end - } - } - - // EVERYTHING FROM HERE DOWN IS THE SAME AS THE REGULAR ITERATOR - reference operator*() const { return *col_current; } - pointer operator->() const { return &(operator*()); } - - iterator& operator++() { - assert(row_current != row_end); // how to ++ from there? - ++col_current; - advance_past_end(); // in case col_current is at end() - return *this; - } - iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; } - - bool operator==(const iterator& it) const { - return ( row_begin == it.row_begin && - row_end == it.row_end && - row_current == it.row_current && - (row_current == row_end || col_current == it.col_current) ); - } - bool operator!=(const iterator& it) const { return !(*this == it); } -}; - -#undef TWOD_BEGIN_ -#undef TWOD_END_ -#undef TWOD_ITER_ -#undef TWOD_CONST_ITER_ - - - - -// SPARSE-TABLE -// ------------ -// The idea is that a table with (logically) t buckets is divided -// into t/M *groups* of M buckets each. (M is a constant set in -// GROUP_SIZE for efficiency.) Each group is stored sparsely. -// Thus, inserting into the table causes some array to grow, which is -// slow but still constant time. Lookup involves doing a -// logical-position-to-sparse-position lookup, which is also slow but -// constant time. The larger M is, the slower these operations are -// but the less overhead (slightly). -// -// To store the sparse array, we store a bitmap B, where B[i] = 1 iff -// bucket i is non-empty. Then to look up bucket i we really look up -// array[# of 1s before i in B]. This is constant time for fixed M. -// -// Terminology: the position of an item in the overall table (from -// 1 .. t) is called its "location." The logical position in a group -// (from 1 .. M ) is called its "position." The actual location in -// the array (from 1 .. # of non-empty buckets in the group) is -// called its "offset." - -// The weird mod in the offset is entirely to quiet compiler warnings -// as is the cast to int after doing the "x mod 256" -#define PUT_(take_from, offset) do { \ - if (putc(static_cast(((take_from) >> ((offset) % (sizeof(take_from)*8)))\ - % 256), fp) \ - == EOF) \ - return false; \ -} while (0) - -#define GET_(add_to, offset) do { \ - if ((x=getc(fp)) == EOF) \ - return false; \ - else \ - add_to |= (static_cast(x) << ((offset) % (sizeof(add_to)*8))); \ -} while (0) - -template -class sparsegroup { - public: - // Basic types - typedef T value_type; - typedef value_type* pointer; - typedef const value_type* const_pointer; - typedef table_iterator > iterator; - typedef const_table_iterator > const_iterator; - typedef table_element_adaptor > element_adaptor; - typedef value_type &reference; - typedef const value_type &const_reference; - typedef u_int16_t size_type; // max # of buckets - typedef int16_t difference_type; - typedef STL_NAMESPACE::reverse_iterator const_reverse_iterator; - typedef STL_NAMESPACE::reverse_iterator reverse_iterator; - - // These are our special iterators, that go over non-empty buckets in a - // group. These aren't const-only because you can change non-empty bcks. - typedef pointer nonempty_iterator; - typedef const_pointer const_nonempty_iterator; - typedef STL_NAMESPACE::reverse_iterator reverse_nonempty_iterator; - typedef STL_NAMESPACE::reverse_iterator const_reverse_nonempty_iterator; - - // Iterator functions - iterator begin() { return iterator(this, 0); } - const_iterator begin() const { return const_iterator(this, 0); } - iterator end() { return iterator(this, size()); } - const_iterator end() const { return const_iterator(this, size()); } - reverse_iterator rbegin() { return reverse_iterator(end()); } - const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } - reverse_iterator rend() { return reverse_iterator(begin()); } - const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } - - // We'll have versions for our special non-empty iterator too - nonempty_iterator nonempty_begin() { return group; } - const_nonempty_iterator nonempty_begin() const { return group; } - nonempty_iterator nonempty_end() { return group + num_buckets; } - const_nonempty_iterator nonempty_end() const { return group + num_buckets; } - reverse_nonempty_iterator nonempty_rbegin() { - return reverse_nonempty_iterator(nonempty_end()); - } - const_reverse_nonempty_iterator nonempty_rbegin() const { - return const_reverse_nonempty_iterator(nonempty_end()); - } - reverse_nonempty_iterator nonempty_rend() { - return reverse_nonempty_iterator(nonempty_begin()); - } - const_reverse_nonempty_iterator nonempty_rend() const { - return const_reverse_nonempty_iterator(nonempty_begin()); - } - - - // This gives us the "default" value to return for an empty bucket. - // We just use the default constructor on T, the template type - const_reference default_value() const { - static value_type defaultval = value_type(); - return defaultval; - } - - - private: - // We need to do all this bit manipulation, of course. ick - static size_type charbit(size_type i) { return i >> 3; } - static size_type modbit(size_type i) { return 1 << (i&7); } - int bmtest(size_type i) const { return bitmap[charbit(i)] & modbit(i); } - void bmset(size_type i) { bitmap[charbit(i)] |= modbit(i); } - void bmclear(size_type i) { bitmap[charbit(i)] &= ~modbit(i); } - - void* realloc_or_die(void* ptr, size_t num_bytes) { - void* retval = realloc(ptr, num_bytes); - if (retval == NULL) { - // We really should use PRIuS here, but I don't want to have to add - // a whole new configure option, with concomitant macro namespace - // pollution, just to print this (unlikely) error message. So I cast. - fprintf(stderr, "FATAL ERROR: failed to allocate %lu bytes for ptr %p", - static_cast(num_bytes), ptr); - exit(1); - } - return retval; - } - - value_type* allocate_group(size_t n) { - return static_cast(realloc_or_die(NULL, - n * sizeof(value_type))); - } - - void free_group() { - // Valid even for empty group, because NULL+0 is defined to be NULL - value_type* end_it = group + num_buckets; - for (value_type* p = group; p != end_it; ++p) - p->~value_type(); - free(group); - group = NULL; - } - - public: // get_iter() in sparsetable needs it - // We need a small function that tells us how many set bits there are - // in positions 0..i-1 of the bitmap. It uses a big table. - // We make it static so templates don't allocate lots of these tables. - // There are lots of ways to do this calculation (called 'popcount'). - // The 8-bit table lookup is one of the fastest, though this - // implementation suffers from not doing any loop unrolling. See, eg, - // http://www.dalkescientific.com/writings/diary/archive/2008/07/03/hakmem_and_other_popcounts.html - // http://gurmeetsingh.wordpress.com/2008/08/05/fast-bit-counting-routines/ - static size_type pos_to_offset(const unsigned char *bm, size_type pos) { - // We could make these ints. The tradeoff is size (eg does it overwhelm - // the cache?) vs efficiency in referencing sub-word-sized array elements - static const char bits_in[256] = { // # of bits set in one char - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, - }; - size_type retval = 0; - - // [Note: condition pos > 8 is an optimization; convince yourself we - // give exactly the same result as if we had pos >= 8 here instead.] - for ( ; pos > 8; pos -= 8 ) // bm[0..pos/8-1] - retval += bits_in[*bm++]; // chars we want *all* bits in - return retval + bits_in[*bm & ((1 << pos)-1)]; // the char that includes pos - } - - size_type pos_to_offset(size_type pos) const { // not static but still const - return pos_to_offset(bitmap, pos); - } - - - public: - // Constructors -- default and copy -- and destructor - sparsegroup() : group(0), num_buckets(0) { memset(bitmap, 0, sizeof(bitmap)); } - sparsegroup(const sparsegroup& x) : group(0), num_buckets(x.num_buckets) { - if ( num_buckets ) { - group = allocate_group(x.num_buckets); - uninitialized_copy(x.group, x.group + x.num_buckets, group); - } - memcpy(bitmap, x.bitmap, sizeof(bitmap)); - } - ~sparsegroup() { free_group(); } - - // Operator= is just like the copy constructor, I guess - // TODO(austern): Make this exception safe. Handle exceptions in value_type's - // copy constructor. - sparsegroup &operator=(const sparsegroup& x) { - if ( &x == this ) return *this; // x = x - if ( x.num_buckets == 0 ) { - free_group(); - } else { - value_type* p = allocate_group(x.num_buckets); - uninitialized_copy(x.group, x.group + x.num_buckets, p); - free_group(); - group = p; - } - memcpy(bitmap, x.bitmap, sizeof(bitmap)); - num_buckets = x.num_buckets; - return *this; - } - - // Many STL algorithms use swap instead of copy constructors - void swap(sparsegroup& x) { - STL_NAMESPACE::swap(group, x.group); - for ( int i = 0; i < sizeof(bitmap) / sizeof(*bitmap); ++i ) - STL_NAMESPACE::swap(bitmap[i], x.bitmap[i]); // swap not defined on arrays - STL_NAMESPACE::swap(num_buckets, x.num_buckets); - } - - // It's always nice to be able to clear a table without deallocating it - void clear() { - free_group(); - memset(bitmap, 0, sizeof(bitmap)); - num_buckets = 0; - } - - // Functions that tell you about size. Alas, these aren't so useful - // because our table is always fixed size. - size_type size() const { return GROUP_SIZE; } - size_type max_size() const { return GROUP_SIZE; } - bool empty() const { return false; } - // We also may want to know how many *used* buckets there are - size_type num_nonempty() const { return num_buckets; } - - - // get()/set() are explicitly const/non-const. You can use [] if - // you want something that can be either (potentially more expensive). - const_reference get(size_type i) const { - if ( bmtest(i) ) // bucket i is occupied - return group[pos_to_offset(bitmap, i)]; - else - return default_value(); // return the default reference - } - - // TODO(csilvers): make protected + friend - // This is used by sparse_hashtable to get an element from the table - // when we know it exists. - const_reference unsafe_get(size_type i) const { - assert(bmtest(i)); - return group[pos_to_offset(bitmap, i)]; - } - - // TODO(csilvers): make protected + friend - reference mutating_get(size_type i) { // fills bucket i before getting - if ( !bmtest(i) ) - set(i, default_value()); - return group[pos_to_offset(bitmap, i)]; - } - - // Syntactic sugar. It's easy to return a const reference. To - // return a non-const reference, we need to use the assigner adaptor. - const_reference operator[](size_type i) const { - return get(i); - } - - element_adaptor operator[](size_type i) { - return element_adaptor(this, i); - } - - private: - // Create space at group[offset], assuming value_type has trivial - // copy constructor and destructor. (Really, we want it to have - // "trivial move", because that's what realloc and memmove both do. - // But there's no way to capture that using type_traits, so we - // pretend that move(x, y) is equivalent to "x.~T(); new(x) T(y);" - // which is pretty much correct, if a bit conservative.) - void set_aux(size_type offset, true_type) { - group = (value_type *) - realloc_or_die(group, sizeof(*group) * (num_buckets+1)); - // This is equivalent to memmove(), but faster on my Intel P4, - // at least with gcc4.1 -O2 / glibc 2.3.6. - for (size_type i = num_buckets; i > offset; --i) - memcpy(group + i, group + i-1, sizeof(*group)); - } - - // Create space at group[offset], without special assumptions about value_type - void set_aux(size_type offset, false_type) { - // This is valid because 0 <= offset <= num_buckets - value_type* p = allocate_group(num_buckets + 1); - uninitialized_copy(group, group + offset, p); - uninitialized_copy(group + offset, group + num_buckets, p + offset + 1); - free_group(); - group = p; - } - - public: - // This returns a reference to the inserted item (which is a copy of val). - // TODO(austern): Make this exception safe: handle exceptions from - // value_type's copy constructor. - reference set(size_type i, const_reference val) { - size_type offset = pos_to_offset(bitmap, i); // where we'll find (or insert) - if ( bmtest(i) ) { - // Delete the old value, which we're replacing with the new one - group[offset].~value_type(); - } else { - typedef integral_constant::value && - has_trivial_destructor::value)> - realloc_and_memmove_ok; // we pretend mv(x,y) == "x.~T(); new(x) T(y)" - set_aux(offset, realloc_and_memmove_ok()); - ++num_buckets; - bmset(i); - } - // This does the actual inserting. Since we made the array using - // malloc, we use "placement new" to just call the constructor. - new(&group[offset]) value_type(val); - return group[offset]; - } - - // We let you see if a bucket is non-empty without retrieving it - bool test(size_type i) const { - return bmtest(i) ? true : false; // cast an int to a bool - } - bool test(iterator pos) const { - return bmtest(pos.pos) ? true : false; - } - - private: - // Shrink the array, assuming value_type has trivial copy - // constructor and destructor. (Really, we want it to have "trivial - // move", because that's what realloc and memmove both do. But - // there's no way to capture that using type_traits, so we pretend - // that move(x, y) is equivalent to ""x.~T(); new(x) T(y);" - // which is pretty much correct, if a bit conservative.) - void erase_aux(size_type offset, true_type) { - // This isn't technically necessary, since we know we have a - // trivial destructor, but is a cheap way to get a bit more safety. - group[offset].~value_type(); - // This is equivalent to memmove(), but faster on my Intel P4, - // at lesat with gcc4.1 -O2 / glibc 2.3.6. - assert(num_buckets > 0); - for (size_type i = offset; i < num_buckets-1; ++i) - memcpy(group + i, group + i+1, sizeof(*group)); // hopefully inlined! - group = (value_type *) - realloc_or_die(group, sizeof(*group) * (num_buckets-1)); - } - - // Shrink the array, without any special assumptions about value_type. - void erase_aux(size_type offset, false_type) { - // This is valid because 0 <= offset < num_buckets. Note the inequality. - value_type* p = allocate_group(num_buckets - 1); - uninitialized_copy(group, group + offset, p); - uninitialized_copy(group + offset + 1, group + num_buckets, p + offset); - free_group(); - group = p; - } - - public: - // This takes the specified elements out of the group. This is - // "undefining", rather than "clearing". - // TODO(austern): Make this exception safe: handle exceptions from - // value_type's copy constructor. - void erase(size_type i) { - if ( bmtest(i) ) { // trivial to erase empty bucket - size_type offset = pos_to_offset(bitmap,i); // where we'll find (or insert) - if ( num_buckets == 1 ) { - free_group(); - group = NULL; - } else { - typedef integral_constant::value && - has_trivial_destructor::value)> - realloc_and_memmove_ok; // pretend mv(x,y) == "x.~T(); new(x) T(y)" - erase_aux(offset, realloc_and_memmove_ok()); - } - --num_buckets; - bmclear(i); - } - } - - void erase(iterator pos) { - erase(pos.pos); - } - - void erase(iterator start_it, iterator end_it) { - // This could be more efficient, but to do so we'd need to make - // bmclear() clear a range of indices. Doesn't seem worth it. - for ( ; start_it != end_it; ++start_it ) - erase(start_it); - } - - - // I/O - // We support reading and writing groups to disk. We don't store - // the actual array contents (which we don't know how to store), - // just the bitmap and size. Meant to be used with table I/O. - // Returns true if all was ok - bool write_metadata(FILE *fp) const { - assert(sizeof(num_buckets) == 2); // we explicitly set to u_int16_t - PUT_(num_buckets, 8); - PUT_(num_buckets, 0); - if ( !fwrite(bitmap, sizeof(bitmap), 1, fp) ) return false; - return true; - } - - // Reading destroys the old group contents! Returns true if all was ok - bool read_metadata(FILE *fp) { - clear(); - - int x; // the GET_ macro requires an 'int x' to be defined - GET_(num_buckets, 8); - GET_(num_buckets, 0); - - if ( !fread(bitmap, sizeof(bitmap), 1, fp) ) return false; - - // We'll allocate the space, but we won't fill it: it will be - // left as uninitialized raw memory. - group = allocate_group(num_buckets); - return true; - } - - // If your keys and values are simple enough, we can write them - // to disk for you. "simple enough" means POD and no pointers. - // However, we don't try to normalize endianness - bool write_nopointer_data(FILE *fp) const { - for ( const_nonempty_iterator it = nonempty_begin(); - it != nonempty_end(); ++it ) { - if ( !fwrite(&*it, sizeof(*it), 1, fp) ) return false; - } - return true; - } - - // When reading, we have to override the potential const-ness of *it. - // Again, only meaningful if value_type is a POD. - bool read_nopointer_data(FILE *fp) { - for ( nonempty_iterator it = nonempty_begin(); - it != nonempty_end(); ++it ) { - if ( !fread(reinterpret_cast(&(*it)), sizeof(*it), 1, fp) ) - return false; - } - return true; - } - - // Comparisons. Note the comparisons are pretty arbitrary: we - // compare values of the first index that isn't equal (using default - // value for empty buckets). - bool operator==(const sparsegroup& x) const { - return ( num_buckets == x.num_buckets && - memcmp(bitmap, x.bitmap, sizeof(bitmap)) == 0 && - STL_NAMESPACE::equal(begin(), end(), x.begin()) ); // from algorithm - } - bool operator<(const sparsegroup& x) const { // also from algorithm - return STL_NAMESPACE::lexicographical_compare(begin(), end(), - x.begin(), x.end()); - } - bool operator!=(const sparsegroup& x) const { return !(*this == x); } - bool operator<=(const sparsegroup& x) const { return !(x < *this); } - bool operator>(const sparsegroup& x) const { return x < *this; } - bool operator>=(const sparsegroup& x) const { return !(*this < x); } - - private: - // The actual data - value_type *group; // (small) array of T's - unsigned char bitmap[(GROUP_SIZE-1)/8 + 1]; // fancy math is so we round up - size_type num_buckets; // limits GROUP_SIZE to 64K -}; - -// We need a global swap as well -template -inline void swap(sparsegroup &x, sparsegroup &y) { - x.swap(y); -} - -// --------------------------------------------------------------------------- - - -template -class sparsetable { - public: - // Basic types - typedef T value_type; // stolen from stl_vector.h - typedef value_type* pointer; - typedef const value_type* const_pointer; - typedef table_iterator > iterator; - typedef const_table_iterator > const_iterator; - typedef table_element_adaptor > element_adaptor; - typedef value_type &reference; - typedef const value_type &const_reference; - typedef size_t size_type; - typedef ptrdiff_t difference_type; - typedef STL_NAMESPACE::reverse_iterator const_reverse_iterator; - typedef STL_NAMESPACE::reverse_iterator reverse_iterator; - - // These are our special iterators, that go over non-empty buckets in a - // table. These aren't const only because you can change non-empty bcks. - typedef two_d_iterator< vector< sparsegroup > > - nonempty_iterator; - typedef const_two_d_iterator< vector< sparsegroup > > - const_nonempty_iterator; - typedef STL_NAMESPACE::reverse_iterator reverse_nonempty_iterator; - typedef STL_NAMESPACE::reverse_iterator const_reverse_nonempty_iterator; - // Another special iterator: it frees memory as it iterates (used to resize) - typedef destructive_two_d_iterator< vector< sparsegroup > > - destructive_iterator; - - // Iterator functions - iterator begin() { return iterator(this, 0); } - const_iterator begin() const { return const_iterator(this, 0); } - iterator end() { return iterator(this, size()); } - const_iterator end() const { return const_iterator(this, size()); } - reverse_iterator rbegin() { return reverse_iterator(end()); } - const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } - reverse_iterator rend() { return reverse_iterator(begin()); } - const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } - - // Versions for our special non-empty iterator - nonempty_iterator nonempty_begin() { - return nonempty_iterator(groups.begin(), groups.end(), groups.begin()); - } - const_nonempty_iterator nonempty_begin() const { - return const_nonempty_iterator(groups.begin(),groups.end(), groups.begin()); - } - nonempty_iterator nonempty_end() { - return nonempty_iterator(groups.begin(), groups.end(), groups.end()); - } - const_nonempty_iterator nonempty_end() const { - return const_nonempty_iterator(groups.begin(), groups.end(), groups.end()); - } - reverse_nonempty_iterator nonempty_rbegin() { - return reverse_nonempty_iterator(nonempty_end()); - } - const_reverse_nonempty_iterator nonempty_rbegin() const { - return const_reverse_nonempty_iterator(nonempty_end()); - } - reverse_nonempty_iterator nonempty_rend() { - return reverse_nonempty_iterator(nonempty_begin()); - } - const_reverse_nonempty_iterator nonempty_rend() const { - return const_reverse_nonempty_iterator(nonempty_begin()); - } - destructive_iterator destructive_begin() { - return destructive_iterator(groups.begin(), groups.end(), groups.begin()); - } - destructive_iterator destructive_end() { - return destructive_iterator(groups.begin(), groups.end(), groups.end()); - } - - private: - typedef typename vector< sparsegroup >::reference - GroupsReference; - typedef typename - vector< sparsegroup >::const_reference - GroupsConstReference; - typedef typename vector< sparsegroup >::iterator - GroupsIterator; - typedef typename vector< sparsegroup >::const_iterator - GroupsConstIterator; - - // How to deal with the proper group - static size_type num_groups(size_type num) { // how many to hold num buckets - return num == 0 ? 0 : ((num-1) / GROUP_SIZE) + 1; - } - - u_int16_t pos_in_group(size_type i) const { - return static_cast(i % GROUP_SIZE); - } - size_type group_num(size_type i) const { - return i / GROUP_SIZE; - } - GroupsReference which_group(size_type i) { - return groups[group_num(i)]; - } - GroupsConstReference which_group(size_type i) const { - return groups[group_num(i)]; - } - - public: - // Constructors -- default, normal (when you specify size), and copy - sparsetable(size_type sz = 0) - : groups(num_groups(sz)), table_size(sz), num_buckets(0) { } - // We'll can get away with using the default copy constructor, - // and default destructor, and hence the default operator=. Huzzah! - - // Many STL algorithms use swap instead of copy constructors - void swap(sparsetable& x) { - STL_NAMESPACE::swap(groups, x.groups); - STL_NAMESPACE::swap(table_size, x.table_size); - STL_NAMESPACE::swap(num_buckets, x.num_buckets); - } - - // It's always nice to be able to clear a table without deallocating it - void clear() { - GroupsIterator group; - for ( group = groups.begin(); group != groups.end(); ++group ) { - group->clear(); - } - num_buckets = 0; - } - - // Functions that tell you about size. - // NOTE: empty() is non-intuitive! It does not tell you the number - // of not-empty buckets (use num_nonempty() for that). Instead - // it says whether you've allocated any buckets or not. - size_type size() const { return table_size; } - size_type max_size() const { return size_type(-1); } - bool empty() const { return table_size == 0; } - // We also may want to know how many *used* buckets there are - size_type num_nonempty() const { return num_buckets; } - - // OK, we'll let you resize one of these puppies - void resize(size_type new_size) { - groups.resize(num_groups(new_size)); - if ( new_size < table_size) { // lower num_buckets, clear last group - if ( pos_in_group(new_size) > 0 ) // need to clear inside last group - groups.back().erase(groups.back().begin() + pos_in_group(new_size), - groups.back().end()); - num_buckets = 0; // refigure # of used buckets - GroupsConstIterator group; - for ( group = groups.begin(); group != groups.end(); ++group ) - num_buckets += group->num_nonempty(); - } - table_size = new_size; - } - - - // We let you see if a bucket is non-empty without retrieving it - bool test(size_type i) const { - return which_group(i).test(pos_in_group(i)); - } - bool test(iterator pos) const { - return which_group(pos.pos).test(pos_in_group(pos.pos)); - } - bool test(const_iterator pos) const { - return which_group(pos.pos).test(pos_in_group(pos.pos)); - } - - // We only return const_references because it's really hard to - // return something settable for empty buckets. Use set() instead. - const_reference get(size_type i) const { - assert(i < table_size); - return which_group(i).get(pos_in_group(i)); - } - - // TODO(csilvers): make protected + friend - // This is used by sparse_hashtable to get an element from the table - // when we know it exists (because the caller has called test(i)). - const_reference unsafe_get(size_type i) const { - assert(i < table_size); - assert(test(i)); - return which_group(i).unsafe_get(pos_in_group(i)); - } - - // TODO(csilvers): make protected + friend element_adaptor - reference mutating_get(size_type i) { // fills bucket i before getting - assert(i < table_size); - size_type old_numbuckets = which_group(i).num_nonempty(); - reference retval = which_group(i).mutating_get(pos_in_group(i)); - num_buckets += which_group(i).num_nonempty() - old_numbuckets; - return retval; - } - - // Syntactic sugar. As in sparsegroup, the non-const version is harder - const_reference operator[](size_type i) const { - return get(i); - } - - element_adaptor operator[](size_type i) { - return element_adaptor(this, i); - } - - // Needed for hashtables, gets as a nonempty_iterator. Crashes for empty bcks - const_nonempty_iterator get_iter(size_type i) const { - assert(test(i)); // how can a nonempty_iterator point to an empty bucket? - return const_nonempty_iterator( - groups.begin(), groups.end(), - groups.begin() + group_num(i), - (groups[group_num(i)].nonempty_begin() + - groups[group_num(i)].pos_to_offset(pos_in_group(i)))); - } - // For nonempty we can return a non-const version - nonempty_iterator get_iter(size_type i) { - assert(test(i)); // how can a nonempty_iterator point to an empty bucket? - return nonempty_iterator( - groups.begin(), groups.end(), - groups.begin() + group_num(i), - (groups[group_num(i)].nonempty_begin() + - groups[group_num(i)].pos_to_offset(pos_in_group(i)))); - } - - - // This returns a reference to the inserted item (which is a copy of val) - // The trick is to figure out whether we're replacing or inserting anew - reference set(size_type i, const_reference val) { - assert(i < table_size); - size_type old_numbuckets = which_group(i).num_nonempty(); - reference retval = which_group(i).set(pos_in_group(i), val); - num_buckets += which_group(i).num_nonempty() - old_numbuckets; - return retval; - } - - // This takes the specified elements out of the table. This is - // "undefining", rather than "clearing". - void erase(size_type i) { - assert(i < table_size); - size_type old_numbuckets = which_group(i).num_nonempty(); - which_group(i).erase(pos_in_group(i)); - num_buckets += which_group(i).num_nonempty() - old_numbuckets; - } - - void erase(iterator pos) { - erase(pos.pos); - } - - void erase(iterator start_it, iterator end_it) { - // This could be more efficient, but then we'd need to figure - // out if we spanned groups or not. Doesn't seem worth it. - for ( ; start_it != end_it; ++start_it ) - erase(start_it); - } - - - // We support reading and writing tables to disk. We don't store - // the actual array contents (which we don't know how to store), - // just the groups and sizes. Returns true if all went ok. - - private: - // Every time the disk format changes, this should probably change too - static const unsigned long MAGIC_NUMBER = 0x24687531; - - // Old versions of this code write all data in 32 bits. We need to - // support these files as well as having support for 64-bit systems. - // So we use the following encoding scheme: for values < 2^32-1, we - // store in 4 bytes in big-endian order. For values > 2^32, we - // store 0xFFFFFFF followed by 8 bytes in big-endian order. This - // causes us to mis-read old-version code that stores exactly - // 0xFFFFFFF, but I don't think that is likely to have happened for - // these particular values. - static bool write_32_or_64(FILE* fp, size_type value) { - if ( value < 0xFFFFFFFFULL ) { // fits in 4 bytes - PUT_(value, 24); - PUT_(value, 16); - PUT_(value, 8); - PUT_(value, 0); - } else if ( value == 0xFFFFFFFFUL ) { // special case in 32bit systems - PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); // marker - PUT_(0, 0); PUT_(0, 0); PUT_(0, 0); PUT_(0, 0); - PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); - } else { - PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); // marker - PUT_(value, 56); - PUT_(value, 48); - PUT_(value, 40); - PUT_(value, 32); - PUT_(value, 24); - PUT_(value, 16); - PUT_(value, 8); - PUT_(value, 0); - } - return true; - } - - static bool read_32_or_64(FILE* fp, size_type *value) { // reads into value - size_type first4 = 0; - int x; - GET_(first4, 24); - GET_(first4, 16); - GET_(first4, 8); - GET_(first4, 0); - if ( first4 < 0xFFFFFFFFULL ) { - *value = first4; - } else { - GET_(*value, 56); - GET_(*value, 48); - GET_(*value, 40); - GET_(*value, 32); - GET_(*value, 24); - GET_(*value, 16); - GET_(*value, 8); - GET_(*value, 0); - } - return true; - } - - public: - bool write_metadata(FILE *fp) const { - if ( !write_32_or_64(fp, MAGIC_NUMBER) ) return false; - if ( !write_32_or_64(fp, table_size) ) return false; - if ( !write_32_or_64(fp, num_buckets) ) return false; - - GroupsConstIterator group; - for ( group = groups.begin(); group != groups.end(); ++group ) - if ( group->write_metadata(fp) == false ) return false; - return true; - } - - // Reading destroys the old table contents! Returns true if read ok. - bool read_metadata(FILE *fp) { - size_type magic_read = 0; - if ( !read_32_or_64(fp, &magic_read) ) return false; - if ( magic_read != MAGIC_NUMBER ) { - clear(); // just to be consistent - return false; - } - - if ( !read_32_or_64(fp, &table_size) ) return false; - if ( !read_32_or_64(fp, &num_buckets) ) return false; - - resize(table_size); // so the vector's sized ok - GroupsIterator group; - for ( group = groups.begin(); group != groups.end(); ++group ) - if ( group->read_metadata(fp) == false ) return false; - return true; - } - - // This code is identical to that for SparseGroup - // If your keys and values are simple enough, we can write them - // to disk for you. "simple enough" means no pointers. - // However, we don't try to normalize endianness - bool write_nopointer_data(FILE *fp) const { - for ( const_nonempty_iterator it = nonempty_begin(); - it != nonempty_end(); ++it ) { - if ( !fwrite(&*it, sizeof(*it), 1, fp) ) return false; - } - return true; - } - - // When reading, we have to override the potential const-ness of *it - bool read_nopointer_data(FILE *fp) { - for ( nonempty_iterator it = nonempty_begin(); - it != nonempty_end(); ++it ) { - if ( !fread(reinterpret_cast(&(*it)), sizeof(*it), 1, fp) ) - return false; - } - return true; - } - - // Comparisons. Note the comparisons are pretty arbitrary: we - // compare values of the first index that isn't equal (using default - // value for empty buckets). - bool operator==(const sparsetable& x) const { - return ( table_size == x.table_size && - num_buckets == x.num_buckets && - groups == x.groups ); - } - bool operator<(const sparsetable& x) const { // also from algobase.h - return STL_NAMESPACE::lexicographical_compare(begin(), end(), - x.begin(), x.end()); - } - bool operator!=(const sparsetable& x) const { return !(*this == x); } - bool operator<=(const sparsetable& x) const { return !(x < *this); } - bool operator>(const sparsetable& x) const { return x < *this; } - bool operator>=(const sparsetable& x) const { return !(*this < x); } - - - private: - // The actual data - vector< sparsegroup > groups; // our list of groups - size_type table_size; // how many buckets they want - size_type num_buckets; // number of non-empty buckets -}; - -// We need a global swap as well -template -inline void swap(sparsetable &x, sparsetable &y) { - x.swap(y); -} - -#undef GET_ -#undef PUT_ - -_END_GOOGLE_NAMESPACE_ - -#endif diff --git a/src/sparsehash-1.6/src/google/type_traits.h b/src/sparsehash-1.6/src/google/type_traits.h deleted file mode 100644 index 5f88133..0000000 --- a/src/sparsehash-1.6/src/google/type_traits.h +++ /dev/null @@ -1,250 +0,0 @@ -// Copyright (c) 2006, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// ---- -// Author: Matt Austern -// -// Define a small subset of tr1 type traits. The traits we define are: -// is_integral -// is_floating_point -// is_pointer -// is_reference -// is_pod -// has_trivial_constructor -// has_trivial_copy -// has_trivial_assign -// has_trivial_destructor -// remove_const -// remove_volatile -// remove_cv -// remove_reference -// remove_pointer -// is_convertible -// We can add more type traits as required. - -#ifndef BASE_TYPE_TRAITS_H_ -#define BASE_TYPE_TRAITS_H_ - -#include -#include // For pair - -_START_GOOGLE_NAMESPACE_ - -// integral_constant, defined in tr1, is a wrapper for an integer -// value. We don't really need this generality; we could get away -// with hardcoding the integer type to bool. We use the fully -// general integer_constant for compatibility with tr1. - -template -struct integral_constant { - static const T value = v; - typedef T value_type; - typedef integral_constant type; -}; - -template const T integral_constant::value; - -// Abbreviations: true_type and false_type are structs that represent -// boolean true and false values. -typedef integral_constant true_type; -typedef integral_constant false_type; - -// Types small_ and big_ are guaranteed such that sizeof(small_) < -// sizeof(big_) -typedef char small_; - -struct big_ { - char dummy[2]; -}; - -// is_integral is false except for the built-in integer types. -template struct is_integral : false_type { }; -template<> struct is_integral : true_type { }; -template<> struct is_integral : true_type { }; -template<> struct is_integral : true_type { }; -template<> struct is_integral : true_type { }; -#if defined(_MSC_VER) -// wchar_t is not by default a distinct type from unsigned short in -// Microsoft C. -// See http://msdn2.microsoft.com/en-us/library/dh8che7s(VS.80).aspx -template<> struct is_integral<__wchar_t> : true_type { }; -#else -template<> struct is_integral : true_type { }; -#endif -template<> struct is_integral : true_type { }; -template<> struct is_integral : true_type { }; -template<> struct is_integral : true_type { }; -template<> struct is_integral : true_type { }; -template<> struct is_integral : true_type { }; -template<> struct is_integral : true_type { }; -#ifdef HAVE_LONG_LONG -template<> struct is_integral : true_type { }; -template<> struct is_integral : true_type { }; -#endif - - -// is_floating_point is false except for the built-in floating-point types. -template struct is_floating_point : false_type { }; -template<> struct is_floating_point : true_type { }; -template<> struct is_floating_point : true_type { }; -template<> struct is_floating_point : true_type { }; - - -// is_pointer is false except for pointer types. -template struct is_pointer : false_type { }; -template struct is_pointer : true_type { }; - - -// is_reference is false except for reference types. -template struct is_reference : false_type {}; -template struct is_reference : true_type {}; - - -// We can't get is_pod right without compiler help, so fail conservatively. -// We will assume it's false except for arithmetic types and pointers, -// and const versions thereof. Note that std::pair is not a POD. -template struct is_pod - : integral_constant::value || - is_floating_point::value || - is_pointer::value)> { }; -template struct is_pod : is_pod { }; - - -// We can't get has_trivial_constructor right without compiler help, so -// fail conservatively. We will assume it's false except for: (1) types -// for which is_pod is true. (2) std::pair of types with trivial -// constructors. (3) array of a type with a trivial constructor. -// (4) const versions thereof. -template struct has_trivial_constructor : is_pod { }; -template struct has_trivial_constructor > - : integral_constant::value && - has_trivial_constructor::value)> { }; -template struct has_trivial_constructor - : has_trivial_constructor { }; -template struct has_trivial_constructor - : has_trivial_constructor { }; - -// We can't get has_trivial_copy right without compiler help, so fail -// conservatively. We will assume it's false except for: (1) types -// for which is_pod is true. (2) std::pair of types with trivial copy -// constructors. (3) array of a type with a trivial copy constructor. -// (4) const versions thereof. -template struct has_trivial_copy : is_pod { }; -template struct has_trivial_copy > - : integral_constant::value && - has_trivial_copy::value)> { }; -template struct has_trivial_copy - : has_trivial_copy { }; -template struct has_trivial_copy : has_trivial_copy { }; - -// We can't get has_trivial_assign right without compiler help, so fail -// conservatively. We will assume it's false except for: (1) types -// for which is_pod is true. (2) std::pair of types with trivial copy -// constructors. (3) array of a type with a trivial assign constructor. -template struct has_trivial_assign : is_pod { }; -template struct has_trivial_assign > - : integral_constant::value && - has_trivial_assign::value)> { }; -template struct has_trivial_assign - : has_trivial_assign { }; - -// We can't get has_trivial_destructor right without compiler help, so -// fail conservatively. We will assume it's false except for: (1) types -// for which is_pod is true. (2) std::pair of types with trivial -// destructors. (3) array of a type with a trivial destructor. -// (4) const versions thereof. -template struct has_trivial_destructor : is_pod { }; -template struct has_trivial_destructor > - : integral_constant::value && - has_trivial_destructor::value)> { }; -template struct has_trivial_destructor - : has_trivial_destructor { }; -template struct has_trivial_destructor - : has_trivial_destructor { }; - -// Specified by TR1 [4.7.1] -template struct remove_const { typedef T type; }; -template struct remove_const { typedef T type; }; -template struct remove_volatile { typedef T type; }; -template struct remove_volatile { typedef T type; }; -template struct remove_cv { - typedef typename remove_const::type>::type type; -}; - - -// Specified by TR1 [4.7.2] -template struct remove_reference { typedef T type; }; -template struct remove_reference { typedef T type; }; - -// Specified by TR1 [4.7.4] Pointer modifications. -template struct remove_pointer { typedef T type; }; -template struct remove_pointer { typedef T type; }; -template struct remove_pointer { typedef T type; }; -template struct remove_pointer { typedef T type; }; -template struct remove_pointer { - typedef T type; }; - -// Specified by TR1 [4.6] Relationships between types -#ifndef _MSC_VER -namespace internal { - -// This class is an implementation detail for is_convertible, and you -// don't need to know how it works to use is_convertible. For those -// who care: we declare two different functions, one whose argument is -// of type To and one with a variadic argument list. We give them -// return types of different size, so we can use sizeof to trick the -// compiler into telling us which function it would have chosen if we -// had called it with an argument of type From. See Alexandrescu's -// _Modern C++ Design_ for more details on this sort of trick. - -template -struct ConvertHelper { - static small_ Test(To); - static big_ Test(...); - static From Create(); -}; -} // namespace internal - -// Inherits from true_type if From is convertible to To, false_type otherwise. -template -struct is_convertible - : integral_constant::Test( - internal::ConvertHelper::Create())) - == sizeof(small_)> { -}; -#endif - -_END_GOOGLE_NAMESPACE_ - -#endif // BASE_TYPE_TRAITS_H_ diff --git a/src/sparsehash-1.6/src/hashtable_unittest.cc b/src/sparsehash-1.6/src/hashtable_unittest.cc deleted file mode 100644 index 3dcbfee..0000000 --- a/src/sparsehash-1.6/src/hashtable_unittest.cc +++ /dev/null @@ -1,1531 +0,0 @@ -// Copyright (c) 2005, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// --- -// Author: Craig Silverstein -// -// This tests -// This tests -// This tests -// This tests -// This tests -// This tests - -// Since {dense,sparse}hashtable is templatized, it's important that -// we test every function in every class in this file -- not just to -// see if it works, but even if it compiles. - -#include "config.h" -#include -#include // for stat() -#ifdef HAVE_UNISTD_H -#include // for unlink() -#endif -#include -#include // for silly random-number-seed generator -#include // for sqrt() -#include -#include -#include // for insert_iterator -#include -#include // for setprecision() -#include -#include // for std::length_error -#include HASH_FUN_H // defined in config.h -#include -#include -#include -#include -#include -#include -#include - -// Otherwise, VC++7 warns about size_t -> int in the cout logging lines -#ifdef _MSC_VER -#pragma warning(disable:4267) -#endif - -using GOOGLE_NAMESPACE::sparse_hash_map; -using GOOGLE_NAMESPACE::dense_hash_map; -using GOOGLE_NAMESPACE::sparse_hash_set; -using GOOGLE_NAMESPACE::dense_hash_set; -using GOOGLE_NAMESPACE::sparse_hashtable; -using GOOGLE_NAMESPACE::dense_hashtable; -using STL_NAMESPACE::map; -using STL_NAMESPACE::set; -using STL_NAMESPACE::pair; -using STL_NAMESPACE::make_pair; -using STL_NAMESPACE::string; -using STL_NAMESPACE::insert_iterator; -using STL_NAMESPACE::allocator; -using STL_NAMESPACE::equal_to; -using STL_NAMESPACE::ostream; - -#define LOGF STL_NAMESPACE::cout // where we log to; LOGF is a historical name - -#define CHECK(cond) do { \ - if (!(cond)) { \ - LOGF << "Test failed: " #cond "\n"; \ - exit(1); \ - } \ -} while (0) - -#define CHECK_EQ(a, b) CHECK((a) == (b)) -#define CHECK_LT(a, b) CHECK((a) < (b)) -#define CHECK_GE(a, b) CHECK((a) >= (b)) - -#ifndef _MSC_VER // windows defines its own version -static string TmpFile(const char* basename) { - return string("/tmp/") + basename; -} -#endif - -const char *words[] = {"Baffin\n", // in /usr/dict/words - "Boffin\n", // not in - "baffin\n", // not in - "genial\n", // last word in - "Aarhus\n", // first word alphabetically - "Zurich\n", // last word alphabetically - "Getty\n", -}; - -const char *nwords[] = {"Boffin\n", - "baffin\n", -}; - -const char *default_dict[] = {"Aarhus\n", - "aback\n", - "abandon\n", - "Baffin\n", - "baffle\n", - "bagged\n", - "congenial\n", - "genial\n", - "Getty\n", - "indiscreet\n", - "linens\n", - "pence\n", - "reassure\n", - "sequel\n", - "zoning\n", - "zoo\n", - "Zurich\n", -}; - -// Likewise, it's not standard to hash a string pre-tr1. Luckily, it is a char* -#ifdef HAVE_UNORDERED_MAP -typedef SPARSEHASH_HASH StrHash; -struct CharStarHash { - size_t operator()(const char* s) const { - return StrHash()(string(s)); - } - // These are used by MSVC: - bool operator()(const char* a, const char* b) const { - return strcmp(a, b) < 0; - } - static const size_t bucket_size = 4; // These are required by MSVC - static const size_t min_buckets = 8; // 4 and 8 are the defaults -}; -#else -typedef SPARSEHASH_HASH CharStarHash; -struct StrHash { - size_t operator()(const string& s) const { - return SPARSEHASH_HASH()(s.c_str()); - } - // These are used by MSVC: - bool operator()(const string& a, const string& b) const { - return a < b; - } - static const size_t bucket_size = 4; // These are required by MSVC - static const size_t min_buckets = 8; // 4 and 8 are the defaults -}; -#endif - -// Let us log the pairs that make up a hash_map -template -ostream& operator<<(ostream& s, const pair& p) { - s << "pair(" << p.first << ", " << p.second << ")"; - return s; -} - -struct strcmp_fnc { - bool operator()(const char* s1, const char* s2) const { - return ((s1 == 0 && s2 == 0) || - (s1 && s2 && *s1 == *s2 && strcmp(s1, s2) == 0)); - } -}; - -namespace { - -template -void set_empty_key(sparse_hashtable *ht, T val) { -} - -template -void set_empty_key(sparse_hash_set *ht, T val) { -} - -template -void set_empty_key(sparse_hash_map *ht, K val) { -} - -template -void set_empty_key(dense_hashtable *ht, T val) { - ht->set_empty_key(val); -} - -template -void set_empty_key(dense_hash_set *ht, T val) { - ht->set_empty_key(val); -} - -template -void set_empty_key(dense_hash_map *ht, K val) { - ht->set_empty_key(val); -} - -template -bool clear_no_resize(sparse_hashtable *ht) { - return false; -} - -template -bool clear_no_resize(sparse_hash_set *ht) { - return false; -} - -template -bool clear_no_resize(sparse_hash_map *ht) { - return false; -} - -template -bool clear_no_resize(dense_hashtable *ht) { - ht->clear_no_resize(); - return true; -} - -template -bool clear_no_resize(dense_hash_set *ht) { - ht->clear_no_resize(); - return true; -} - -template -bool clear_no_resize(dense_hash_map *ht) { - ht->clear_no_resize(); - return true; -} - -template -void insert(dense_hashtable *ht, T val) { - ht->insert(val); -} - -template -void insert(dense_hash_set *ht, T val) { - ht->insert(val); -} - -template -void insert(dense_hash_map *ht, K val) { - ht->insert(pair(val,V())); -} - -template -void insert(sparse_hashtable *ht, T val) { - ht->insert(val); -} - -template -void insert(sparse_hash_set *ht, T val) { - ht->insert(val); -} - -template -void insert(sparse_hash_map *ht, K val) { - ht->insert(pair(val,V())); -} - -template -void insert(HT *ht, Iterator begin, Iterator end) { - ht->insert(begin, end); -} - -// For hashtable's and hash_set's, the iterator insert works fine (and -// is used). But for the hash_map's, the iterator insert expects the -// iterators to point to pair's. So by looping over and calling insert -// on each element individually, the code below automatically expands -// into inserting a pair. -template -void insert(dense_hash_map *ht, Iterator begin, Iterator end) { - while (begin != end) { - insert(ht, *begin); - ++begin; - } -} - -template -void insert(sparse_hash_map *ht, Iterator begin, Iterator end) { - while (begin != end) { - insert(ht, *begin); - ++begin; - } -} - -// A version of insert that uses the insert_iterator. But insert_iterator -// isn't defined for the low level hashtable classes, so we just punt to insert. - -template -void iterator_insert(dense_hashtable* ht, T val, - insert_iterator >* ) { - ht->insert(val); -} - -template -void iterator_insert(dense_hash_set* , T val, - insert_iterator >* ii) { - *(*ii)++ = val; -} - -template -void iterator_insert(dense_hash_map* , K val, - insert_iterator >* ii) { - *(*ii)++ = pair(val,V()); -} - -template -void iterator_insert(sparse_hashtable* ht, T val, - insert_iterator >* ) { - ht->insert(val); -} - -template -void iterator_insert(sparse_hash_set* , T val, - insert_iterator >* ii) { - *(*ii)++ = val; -} - -template -void iterator_insert(sparse_hash_map *, K val, - insert_iterator >* ii) { - *(*ii)++ = pair(val,V()); -} - - -void write_item(FILE *fp, const char *val) { - fwrite(val, strlen(val), 1, fp); // \n serves to separate -} - -// The weird 'const' declarations are desired by the compiler. Yucko. -void write_item(FILE *fp, const pair &val) { - fwrite(val.first, strlen(val.first), 1, fp); -} - -void write_item(FILE *fp, const string &val) { - fwrite(val.data(), val.length(), 1, fp); // \n serves to separate -} - -// The weird 'const' declarations are desired by the compiler. Yucko. -void write_item(FILE *fp, const pair &val) { - fwrite(val.first.data(), val.first.length(), 1, fp); -} - -char* read_line(FILE* fp, char* line, int linesize) { - if ( fgets(line, linesize, fp) == NULL ) - return NULL; - // normalize windows files :-( - const size_t linelen = strlen(line); - if ( linelen >= 2 && line[linelen-2] == '\r' && line[linelen-1] == '\n' ) { - line[linelen-2] = '\n'; - line[linelen-1] = '\0'; - } - return line; -} - -void read_item(FILE *fp, char*const* val) { - char line[1024]; - read_line(fp, line, sizeof(line)); - char **p = const_cast(val); - *p = strdup(line); -} - -void read_item(FILE *fp, pair *val) { - char line[1024]; - read_line(fp, line, sizeof(line)); - char **p = const_cast(&val->first); - *p = strdup(line); -} - -void read_item(FILE *fp, const string* val) { - char line[1024]; - read_line(fp, line, sizeof(line)); - new(const_cast(val)) string(line); // need to use placement new -} - -void read_item(FILE *fp, pair *val) { - char line[1024]; - read_line(fp, line, sizeof(line)); - new(const_cast(&val->first)) string(line); -} - -void free_item(char*const* val) { - free(*val); -} - -void free_item(pair *val) { - free(val->first); -} - -int get_int_item(int int_item) { - return int_item; -} - -int get_int_item(pair val) { - return val.first; -} - -int getintkey(int i) { return i; } - -int getintkey(const pair &p) { return p.first; } - -} // end anonymous namespace - -// Performs tests where the hashtable's value type is assumed to be int. -template -void test_int() { - htint x; - htint y(1000); - htint z(64); - set_empty_key(&x, 0xefefef); - set_empty_key(&y, 0xefefef); - set_empty_key(&z, 0xefefef); - - CHECK(y.empty()); - insert(&y, 1); - CHECK(!y.empty()); - insert(&y, 11); - insert(&y, 111); - insert(&y, 1111); - insert(&y, 11111); - insert(&y, 111111); - insert(&y, 1111111); // 1M, more or less - insert(&y, 11111111); - insert(&y, 111111111); - insert(&y, 1111111111); // 1B, more or less - for ( int i = 0; i < 64; ++i ) - insert(&z, i); - // test the second half of the insert with an insert_iterator - insert_iterator insert_iter(z, z.begin()); - for ( int i = 32; i < 64; ++i ) - iterator_insert(&z, i, &insert_iter); - - // only perform the following CHECKs for - // dense{hashtable, _hash_set, _hash_map} - if (clear_no_resize(&x)) { - // make sure x has to increase its number of buckets - typename htint::size_type empty_bucket_count = x.bucket_count(); - int last_element = 0; - while (x.bucket_count() == empty_bucket_count) { - insert(&x, last_element); - ++last_element; - } - // if clear_no_resize is supported (i.e. htint is a - // dense{hashtable,_hash_set,_hash_map}), it should leave the bucket_count - // as is. - typename htint::size_type last_bucket_count = x.bucket_count(); - clear_no_resize(&x); - CHECK(last_bucket_count == x.bucket_count()); - CHECK(x.empty()); - LOGF << "x has " << x.bucket_count() << " buckets\n"; - LOGF << "x size " << x.size() << "\n"; - // when inserting the same number of elements again, no resize should be - // necessary - for (int i = 0; i < last_element; ++i) { - insert(&x, i); - CHECK(x.bucket_count() == last_bucket_count); - } - } - - for ( typename htint::const_iterator it = y.begin(); it != y.end(); ++it ) - LOGF << "y: " << get_int_item(*it) << "\n"; - z.insert(y.begin(), y.end()); - swap(y,z); - for ( typename htint::iterator it = y.begin(); it != y.end(); ++it ) - LOGF << "y+z: " << get_int_item(*it) << "\n"; - LOGF << "z has " << z.bucket_count() << " buckets\n"; - LOGF << "y has " << y.bucket_count() << " buckets\n"; - LOGF << "z size: " << z.size() << "\n"; - - for (int i = 0; i < 64; ++i) - CHECK(y.find(i) != y.end()); - - CHECK(z.size() == 10); - z.set_deleted_key(1010101010); // an unused value - CHECK(z.deleted_key() == 1010101010); - z.erase(11111); - CHECK(z.size() == 9); - insert(&z, 11111); // should retake deleted value - CHECK(z.size() == 10); - // Do the delete/insert again. Last time we probably resized; this time no - z.erase(11111); - insert(&z, 11111); // should retake deleted value - CHECK(z.size() == 10); - - z.erase(-11111); // shouldn't do anything - CHECK(z.size() == 10); - z.erase(1); - CHECK(z.size() == 9); - - typename htint::iterator itdel = z.find(1111); - pair itdel2 - = z.equal_range(1111); - CHECK(itdel2.first != z.end()); - CHECK(&*itdel2.first == &*itdel); // while we're here, check equal_range() - CHECK(itdel2.second == ++itdel2.first); - pair itdel3 - = const_cast(&z)->equal_range(1111); - CHECK(itdel3.first != z.end()); - CHECK(&*itdel3.first == &*itdel); - CHECK(itdel3.second == ++itdel3.first); - - z.erase(itdel); - CHECK(z.size() == 8); - itdel2 = z.equal_range(1111); - CHECK(itdel2.first == z.end()); - CHECK(itdel2.second == itdel2.first); - itdel3 = const_cast(&z)->equal_range(1111); - CHECK(itdel3.first == z.end()); - CHECK(itdel3.second == itdel3.first); - - itdel = z.find(2222); // should be end() - z.erase(itdel); // shouldn't do anything - CHECK(z.size() == 8); - for ( typename htint::const_iterator it = z.begin(); it != z.end(); ++it ) - LOGF << "y: " << get_int_item(*it) << "\n"; - z.set_deleted_key(1010101011); // a different unused value - CHECK(z.deleted_key() == 1010101011); - for ( typename htint::const_iterator it = z.begin(); it != z.end(); ++it ) - LOGF << "y: " << get_int_item(*it) << "\n"; - LOGF << "That's " << z.size() << " elements\n"; - z.erase(z.begin(), z.end()); - CHECK(z.empty()); - - y.clear(); - CHECK(y.empty()); - LOGF << "y has " << y.bucket_count() << " buckets\n"; -} - -// Performs tests where the hashtable's value type is assumed to be char*. -// The read_write parameters specifies whether the read/write tests -// should be performed. Note that densehashtable::write_metdata is not -// implemented, so we only do the read/write tests for the -// sparsehashtable varieties. -template -void test_charptr(bool read_write) { - ht w; - set_empty_key(&w, (char*) NULL); - insert(&w, const_cast(nwords), - const_cast(nwords) + sizeof(nwords) / sizeof(*nwords)); - LOGF << "w has " << w.size() << " items\n"; - CHECK(w.size() == 2); - CHECK(w == w); - - ht x; - set_empty_key(&x, (char*) NULL); - long dict_size = 1; // for size stats -- can't be 0 'cause of division - - map counts; - // Hash the dictionary - { - // automake says 'look for all data files in $srcdir.' OK. - string filestr = (string(getenv("srcdir") ? getenv("srcdir") : ".") + - "/src/words"); - const char* file = filestr.c_str(); - FILE *fp = fopen(file, "rb"); - if ( fp == NULL ) { - LOGF << "Can't open " << file << ", using small, built-in dict...\n"; - for (int i = 0; i < sizeof(default_dict)/sizeof(*default_dict); ++i) { - insert(&x, strdup(default_dict[i])); - counts[default_dict[i]] = 0; - } - } else { - char line[1024]; - while ( read_line(fp, line, sizeof(line)) ) { - insert(&x, strdup(line)); - counts[line] = 0; - } - LOGF << "Read " << x.size() << " words from " << file << "\n"; - fclose(fp); - struct stat buf; - stat(file, &buf); - dict_size = buf.st_size; - LOGF << "Size of " << file << ": " << buf.st_size << " bytes\n"; - } - for (char **word = const_cast(words); - word < const_cast(words) + sizeof(words) / sizeof(*words); - ++word ) { - if (x.find(*word) == x.end()) { - CHECK(w.find(*word) != w.end()); - } else { - CHECK(w.find(*word) == w.end()); - } - } - } - CHECK(counts.size() == x.size()); - - // Save the hashtable. - if (read_write) { - const string file_string = TmpFile(".hashtable_unittest_dicthash"); - const char* file = file_string.c_str(); - FILE *fp = fopen(file, "wb"); - if ( fp == NULL ) { - // maybe we can't write to /tmp/. Try the current directory - file = ".hashtable_unittest_dicthash"; - fp = fopen(file, "wb"); - } - if ( fp == NULL ) { - LOGF << "Can't open " << file << " skipping hashtable save...\n"; - } else { - x.write_metadata(fp); // this only writes meta-information - int write_count = 0; - for ( typename ht::iterator it = x.begin(); it != x.end(); ++it ) { - write_item(fp, *it); - free_item(&(*it)); - ++write_count; - } - LOGF << "Wrote " << write_count << " words to " << file << "\n"; - fclose(fp); - struct stat buf; - stat(file, &buf); - LOGF << "Size of " << file << ": " << buf.st_size << " bytes\n"; - LOGF << STL_NAMESPACE::setprecision(3) - << "Hashtable overhead " - << (buf.st_size - dict_size) * 100.0 / dict_size - << "% (" - << (buf.st_size - dict_size) * 8.0 / write_count - << " bits/entry)\n"; - x.clear(); - - // Load the hashtable - fp = fopen(file, "rb"); - if ( fp == NULL ) { - LOGF << "Can't open " << file << " skipping hashtable reload...\n"; - } else { - x.read_metadata(fp); // reads metainformation - LOGF << "Hashtable size: " << x.size() << "\n"; - int read_count = 0; - for ( typename ht::iterator it = x.begin(); it != x.end(); ++it ) { - read_item(fp, &(*it)); - ++read_count; - } - LOGF << "Read " << read_count << " words from " << file << "\n"; - fclose(fp); - unlink(file); - for ( char **word = const_cast(words); - word < const_cast(words) + sizeof(words) / sizeof(*words); - ++word ) { - if (x.find(*word) == x.end()) { - CHECK(w.find(*word) != w.end()); - } else { - CHECK(w.find(*word) == w.end()); - } - } - } - } - } - for ( typename ht::iterator it = x.begin(); it != x.end(); ++it ) { - free_item(&(*it)); - } -} - -// Perform tests where the hashtable's value type is assumed to -// be string. -// TODO(austern): factor out the bulk of test_charptr and test_string -// into a common function. -template -void test_string(bool read_write) { - ht w; - set_empty_key(&w, string("-*- empty key -*-")); - const int N = sizeof(nwords) / sizeof(*nwords); - string* nwords1 = new string[N]; - for (int i = 0; i < N; ++i) - nwords1[i] = nwords[i]; - insert(&w, nwords1, nwords1 + N); - delete[] nwords1; - LOGF << "w has " << w.size() << " items\n"; - CHECK(w.size() == 2); - CHECK(w == w); - - ht x; - set_empty_key(&x, string("-*- empty key -*-")); - long dict_size = 1; // for size stats -- can't be 0 'cause of division - - map counts; - // Hash the dictionary - { - // automake says 'look for all data files in $srcdir.' OK. - string filestr = (string(getenv("srcdir") ? getenv("srcdir") : ".") + - "/src/words"); - const char* file = filestr.c_str(); - FILE *fp = fopen(file, "rb"); - if ( fp == NULL ) { - LOGF << "Can't open " << file << ", using small, built-in dict...\n"; - for (int i = 0; i < sizeof(default_dict)/sizeof(*default_dict); ++i) { - insert(&x, string(default_dict[i])); - counts[default_dict[i]] = 0; - } - } else { - char line[1024]; - while ( fgets(line, sizeof(line), fp) ) { - insert(&x, string(line)); - counts[line] = 0; - } - LOGF << "Read " << x.size() << " words from " << file << "\n"; - fclose(fp); - struct stat buf; - stat(file, &buf); - dict_size = buf.st_size; - LOGF << "Size of " << file << ": " << buf.st_size << " bytes\n"; - } - for ( const char* const* word = words; - word < words + sizeof(words) / sizeof(*words); - ++word ) { - if (x.find(*word) == x.end()) { - CHECK(w.find(*word) != w.end()); - } else { - CHECK(w.find(*word) == w.end()); - } - } - } - CHECK(counts.size() == x.size()); - { - // verify that size() works correctly - int xcount = 0; - for ( typename ht::iterator it = x.begin(); it != x.end(); ++it ) { - ++xcount; - } - CHECK(x.size() == xcount); - } - - // Save the hashtable. - if (read_write) { - const string file_string = TmpFile(".hashtable_unittest_dicthash_str"); - const char* file = file_string.c_str(); - FILE *fp = fopen(file, "wb"); - if ( fp == NULL ) { - // maybe we can't write to /tmp/. Try the current directory - file = ".hashtable_unittest_dicthash_str"; - fp = fopen(file, "wb"); - } - if ( fp == NULL ) { - LOGF << "Can't open " << file << " skipping hashtable save...\n"; - } else { - x.write_metadata(fp); // this only writes meta-information - int write_count = 0; - for ( typename ht::iterator it = x.begin(); it != x.end(); ++it ) { - write_item(fp, *it); - ++write_count; - } - LOGF << "Wrote " << write_count << " words to " << file << "\n"; - fclose(fp); - struct stat buf; - stat(file, &buf); - LOGF << "Size of " << file << ": " << buf.st_size << " bytes\n"; - LOGF << STL_NAMESPACE::setprecision(3) - << "Hashtable overhead " - << (buf.st_size - dict_size) * 100.0 / dict_size - << "% (" - << (buf.st_size - dict_size) * 8.0 / write_count - << " bits/entry)\n"; - x.clear(); - - // Load the hashtable - fp = fopen(file, "rb"); - if ( fp == NULL ) { - LOGF << "Can't open " << file << " skipping hashtable reload...\n"; - } else { - x.read_metadata(fp); // reads metainformation - LOGF << "Hashtable size: " << x.size() << "\n"; - int count = 0; - for ( typename ht::iterator it = x.begin(); it != x.end(); ++it ) { - read_item(fp, &(*it)); - ++count; - } - LOGF << "Read " << count << " words from " << file << "\n"; - fclose(fp); - unlink(file); - for ( const char* const* word = words; - word < words + sizeof(words) / sizeof(*words); - ++word ) { - if (x.find(*word) == x.end()) { - CHECK(w.find(*word) != w.end()); - } else { - CHECK(w.find(*word) == w.end()); - } - } - } - } - } - - // ensure that destruction is done properly in clear_no_resize() - if (!clear_no_resize(&w)) w.clear(); -} - -// The read_write parameters specifies whether the read/write tests -// should be performed. Note that densehashtable::write_metdata is not -// implemented, so we only do the read/write tests for the -// sparsehashtable varieties. -template -void test(bool read_write) { - test_int(); - test_string(read_write); - test_charptr(read_write); -} - -// For data types with trivial copy-constructors and destructors, we -// should use an optimized routine for data-copying, that involves -// memmove. We test this by keeping count of how many times the -// copy-constructor is called; it should be much less with the -// optimized code. - -class Memmove { - public: - Memmove(): i_(0) {} - explicit Memmove(int i): i_(i) {} - Memmove(const Memmove& that) { - this->i_ = that.i_; - num_copies_++; - } - - int i_; - static int num_copies_; -}; -int Memmove::num_copies_ = 0; - - -// This is what tells the hashtable code it can use memmove for this class: -_START_GOOGLE_NAMESPACE_ -template<> struct has_trivial_copy : true_type { }; -template<> struct has_trivial_destructor : true_type { }; -_END_GOOGLE_NAMESPACE_ - -class NoMemmove { - public: - NoMemmove(): i_(0) {} - explicit NoMemmove(int i): i_(i) {} - NoMemmove(const NoMemmove& that) { - this->i_ = that.i_; - num_copies_++; - } - - int i_; - static int num_copies_; -}; -int NoMemmove::num_copies_ = 0; - -void TestSimpleDataTypeOptimizations() { - { - sparse_hash_map memmove; - sparse_hash_map nomemmove; - - Memmove::num_copies_ = 0; // reset - NoMemmove::num_copies_ = 0; // reset - for (int i = 10000; i > 0; i--) { - memmove[i] = Memmove(i); - } - for (int i = 10000; i > 0; i--) { - nomemmove[i] = NoMemmove(i); - } - LOGF << "sparse_hash_map copies for unoptimized/optimized cases: " - << NoMemmove::num_copies_ << "/" << Memmove::num_copies_ << "\n"; - CHECK(NoMemmove::num_copies_ > Memmove::num_copies_); - } - // Same should hold true for dense_hash_map - { - dense_hash_map memmove; - dense_hash_map nomemmove; - memmove.set_empty_key(0); - nomemmove.set_empty_key(0); - - Memmove::num_copies_ = 0; // reset - NoMemmove::num_copies_ = 0; // reset - for (int i = 10000; i > 0; i--) { - memmove[i] = Memmove(i); - } - for (int i = 10000; i > 0; i--) { - nomemmove[i] = NoMemmove(i); - } - LOGF << "dense_hash_map copies for unoptimized/optimized cases: " - << NoMemmove::num_copies_ << "/" << Memmove::num_copies_ << "\n"; - CHECK(NoMemmove::num_copies_ > Memmove::num_copies_); - } -} - -void TestShrinking() { - // We want to make sure that when we create a hashtable, and then - // add and delete one element, the size of the hashtable doesn't - // change. - { - sparse_hash_set s; - s.set_deleted_key(0); - const int old_bucket_count = s.bucket_count(); - s.insert(4); - s.erase(4); - s.insert(4); - s.erase(4); - CHECK_EQ(old_bucket_count, s.bucket_count()); - } - { - dense_hash_set s; - s.set_deleted_key(0); - s.set_empty_key(1); - const int old_bucket_count = s.bucket_count(); - s.insert(4); - s.erase(4); - s.insert(4); - s.erase(4); - CHECK_EQ(old_bucket_count, s.bucket_count()); - } - { - sparse_hash_set s(2); // start small: only expects 2 items - CHECK_LT(s.bucket_count(), 32); // verify we actually do start small - s.set_deleted_key(0); - const int old_bucket_count = s.bucket_count(); - s.insert(4); - s.erase(4); - s.insert(4); - s.erase(4); - CHECK_EQ(old_bucket_count, s.bucket_count()); - } - { - dense_hash_set s(2); // start small: only expects 2 items - CHECK_LT(s.bucket_count(), 32); // verify we actually do start small - s.set_deleted_key(0); - s.set_empty_key(1); - const int old_bucket_count = s.bucket_count(); - s.insert(4); - s.erase(4); - s.insert(4); - s.erase(4); - CHECK_EQ(old_bucket_count, s.bucket_count()); - } -} - -class TestHashFcn : public SPARSEHASH_HASH { - public: - explicit TestHashFcn(int i) - : id_(i) { - } - - int id() const { - return id_; - } - - private: - int id_; -}; - -class TestEqualTo : public equal_to { - public: - explicit TestEqualTo(int i) - : id_(i) { - } - - int id() const { - return id_; - } - - private: - int id_; -}; - -template