1
0
mirror of https://git.savannah.gnu.org/git/gperf.git synced 2025-12-02 13:09:22 +00:00

Optimized choice during collision resolution.

This commit is contained in:
Bruno Haible
2003-03-14 11:01:01 +00:00
parent 19c69d8e5a
commit 7a8b43182a
5 changed files with 136 additions and 13 deletions

View File

@@ -1,5 +1,19 @@
2002-12-07 Bruno Haible <bruno@clisp.org> 2002-12-07 Bruno Haible <bruno@clisp.org>
* src/options.h (OPT_CHOICE): New enum value.
* src/options.cc (Options::~Options): Update.
(long_options): New option --optimized-collision-resolution.
(Options::parse_options): Accept option -O.
* src/search.h (Search::sort_by_occurrence): Change argument to
'unsigned int'.
(Search::compute_occurrence, Search::sort_by_occurrence): New method
declarations.
* src/search.cc (Search::sort_by_occurrence): Change argument to
'unsigned int'.
(Search::compute_occurrence, Search::sort_by_occurrence): New methods.
(Search::find_asso_values): Implement OPT_CHOICE. More debugging
output.
* src/search.cc (Search::prepare_asso_values) [DEBUG]: Also print * src/search.cc (Search::prepare_asso_values) [DEBUG]: Also print
the keyword list in order. the keyword list in order.
(Search::find_asso_values) [DEBUG]: Upon failure, print the union_set. (Search::find_asso_values) [DEBUG]: Upon failure, print the union_set.

View File

@@ -473,6 +473,7 @@ Options::~Options ()
"\nENUM is........: %s" "\nENUM is........: %s"
"\nINCLUDE is.....: %s" "\nINCLUDE is.....: %s"
"\nSEVENBIT is....: %s" "\nSEVENBIT is....: %s"
"\nOPT_CHOICE is..: %s"
"\niterations = %d" "\niterations = %d"
"\nlookup function name = %s" "\nlookup function name = %s"
"\nhash function name = %s" "\nhash function name = %s"
@@ -505,6 +506,7 @@ Options::~Options ()
_option_word & ENUM ? "enabled" : "disabled", _option_word & ENUM ? "enabled" : "disabled",
_option_word & INCLUDE ? "enabled" : "disabled", _option_word & INCLUDE ? "enabled" : "disabled",
_option_word & SEVENBIT ? "enabled" : "disabled", _option_word & SEVENBIT ? "enabled" : "disabled",
_option_word & OPT_CHOICE ? "enabled" : "disabled",
_iterations, _iterations,
_function_name, _hash_name, _wordlist_name, _slot_name, _function_name, _hash_name, _wordlist_name, _slot_name,
_initializer_suffix, _asso_iterations, _jump, _size_multiple, _initializer_suffix, _asso_iterations, _jump, _size_multiple,
@@ -648,6 +650,7 @@ static const struct option long_options[] =
{ "multiple-iterations", required_argument, NULL, 'm' }, { "multiple-iterations", required_argument, NULL, 'm' },
{ "no-strlen", no_argument, NULL, 'n' }, { "no-strlen", no_argument, NULL, 'n' },
{ "occurrence-sort", no_argument, NULL, 'o' }, { "occurrence-sort", no_argument, NULL, 'o' },
{ "optimized-collision-resolution", no_argument, NULL, 'O' },
{ "random", no_argument, NULL, 'r' }, { "random", no_argument, NULL, 'r' },
{ "size-multiple", required_argument, NULL, 's' }, { "size-multiple", required_argument, NULL, 's' },
{ "help", no_argument, NULL, 'h' }, { "help", no_argument, NULL, 'h' },
@@ -667,7 +670,7 @@ Options::parse_options (int argc, char *argv[])
while ((option_char = while ((option_char =
getopt_long (_argument_count, _argument_vector, getopt_long (_argument_count, _argument_vector,
"acCdDe:Ef:F:gGhH:i:Ij:k:K:lL:m:nN:oprs:S:tTvW:Z:7", "acCdDe:Ef:F:gGhH:i:Ij:k:K:lL:m:nN:oOprs:S:tTvW:Z:7",
long_options, NULL)) long_options, NULL))
!= -1) != -1)
{ {
@@ -862,6 +865,11 @@ Options::parse_options (int argc, char *argv[])
_option_word |= ORDER; _option_word |= ORDER;
break; break;
} }
case 'O': /* Optimized choice during collision resolution. */
{
_option_word |= OPT_CHOICE;
break;
}
case 'p': /* Generated lookup function a pointer instead of int. */ case 'p': /* Generated lookup function a pointer instead of int. */
break; /* This is now the default. */ break; /* This is now the default. */
case 'r': /* Utilize randomness to initialize the associated values table. */ case 'r': /* Utilize randomness to initialize the associated values table. */

View File

@@ -101,7 +101,10 @@ enum Option_Type
INCLUDE = 1 << 20, INCLUDE = 1 << 20,
/* Assume 7-bit, not 8-bit, characters. */ /* Assume 7-bit, not 8-bit, characters. */
SEVENBIT = 1 << 21 SEVENBIT = 1 << 21,
/* Apply optimized collision resolution to speed-up search time. */
OPT_CHOICE = 1 << 22
}; };
/* Class manager for gperf program Options. */ /* Class manager for gperf program Options. */

View File

@@ -27,6 +27,7 @@
#include <stdlib.h> /* declares exit(), rand(), srand() */ #include <stdlib.h> /* declares exit(), rand(), srand() */
#include <string.h> /* declares memset(), memcmp() */ #include <string.h> /* declares memset(), memcmp() */
#include <time.h> /* declares time() */ #include <time.h> /* declares time() */
#include <math.h> /* declares exp() */
#include <limits.h> /* defines INT_MIN, INT_MAX, UINT_MAX */ #include <limits.h> /* defines INT_MIN, INT_MAX, UINT_MAX */
#include "options.h" #include "options.h"
#include "hash-table.h" #include "hash-table.h"
@@ -904,7 +905,7 @@ Search::compute_hash (KeywordExt *keyword) const
const unsigned int *p = keyword->_selchars; const unsigned int *p = keyword->_selchars;
int i = keyword->_selchars_length; int i = keyword->_selchars_length;
for (; i > 0; p++, i--) for (; i > 0; p++, i--)
sum += _asso_values[*p]; sum += _asso_values[*p];
return keyword->_hash_value = sum; return keyword->_hash_value = sum;
} }
@@ -962,20 +963,96 @@ compute_disjoint_union (const unsigned int *set_1, int size_1,
/* Sorts the given set in increasing frequency of _occurrences[]. */ /* Sorts the given set in increasing frequency of _occurrences[]. */
inline void inline void
Search::sort_by_occurrence (unsigned int *set, int len) const Search::sort_by_occurrence (unsigned int *set, unsigned int len) const
{ {
/* Use bubble sort, since the set is typically short. */ /* Use bubble sort, since the set is typically short. */
for (int i = 1; i < len; i++) for (unsigned int i = 1; i < len; i++)
{ {
int curr; unsigned int j;
unsigned int tmp; unsigned int tmp;
for (curr = i, tmp = set[curr]; for (j = i, tmp = set[j];
curr > 0 && _occurrences[tmp] < _occurrences[set[curr-1]]; j > 0 && _occurrences[tmp] < _occurrences[set[j-1]];
curr--) j--)
set[curr] = set[curr - 1]; set[j] = set[j - 1];
set[curr] = tmp; set[j] = tmp;
}
}
/* Computes the frequency of occurrence of a character among the keywords
up to the given keyword. */
inline unsigned int
Search::compute_occurrence (unsigned int c, KeywordExt *curr) const
{
unsigned int occurrence = 0;
for (KeywordExt_List *temp = _head; ; temp = temp->rest())
{
KeywordExt *keyword = temp->first();
int m = keyword->_selchars_length;
for (int i = 0; i < m; i++)
if (keyword->_selchars[i] == c)
{
occurrence++;
break;
}
if (keyword == curr)
break;
}
return occurrence;
}
/* Sorts the given set in increasing frequency of occurrences among the
keywords up to the given keyword. */
inline void
Search::sort_by_occurrence (unsigned int *set, unsigned int len, KeywordExt *curr) const
{
unsigned int occurrences[len];
for (unsigned int j = 0; j < len; j++)
occurrences[j] = 0;
for (KeywordExt_List *temp = _head; ; temp = temp->rest())
{
KeywordExt *keyword = temp->first();
int m = keyword->_selchars_length;
for (unsigned int j = 0; j < len; j++)
{
unsigned int c = set[j];
for (int i = 0; i < m; i++)
if (keyword->_selchars[i] == c)
{
occurrences[j]++;
break;
}
}
if (keyword == curr)
break;
}
/* Use bubble sort, since the set is typically short. */
for (unsigned int i = 1; i < len; i++)
{
unsigned int j;
unsigned int set_tmp, occ_tmp;
for (j = i, set_tmp = set[j], occ_tmp = occurrences[j];
j > 0 && occ_tmp < occurrences[j-1];
j--)
{
set[j] = set[j - 1];
occurrences[j] = occurrences[j - 1];
}
set[j] = set_tmp;
occurrences[j] = occ_tmp;
} }
} }
@@ -1152,12 +1229,26 @@ Search::find_asso_values ()
/* Sort by decreasing occurrence: Try least-used characters c first. /* Sort by decreasing occurrence: Try least-used characters c first.
The idea is that this reduces the number of freshly introduced The idea is that this reduces the number of freshly introduced
collisions. */ collisions. */
sort_by_occurrence (union_set, union_set_length); if (option[OPT_CHOICE])
sort_by_occurrence (union_set, union_set_length, curr);
else
sort_by_occurrence (union_set, union_set_length);
for (union_index = 0; union_index < union_set_length; union_index++) for (union_index = 0; union_index < union_set_length; union_index++)
{ {
c = union_set[union_index]; c = union_set[union_index];
if (option[DEBUG])
{
unsigned int n = sp - stack + 1;
unsigned int o = compute_occurrence (c, curr);
fprintf (stderr, "Expected number of iterations between %g and %g\n",
exp (static_cast<double>((2*n-o)*o)
/ static_cast<double>(2*_max_hash_value)),
exp (static_cast<double>((2*n-o)*o)
/ static_cast<double>(2*_asso_value_max)));
}
/* Try various other values for _asso_values[c]. A value is /* Try various other values for _asso_values[c]. A value is
successful if, with it, the recomputed hash values for the successful if, with it, the recomputed hash values for the
keywords from _head->first() to curr - inclusive - give no keywords from _head->first() to curr - inclusive - give no

View File

@@ -90,8 +90,15 @@ private:
and stores it in keyword->_hash_value. */ and stores it in keyword->_hash_value. */
int compute_hash (KeywordExt *keyword) const; int compute_hash (KeywordExt *keyword) const;
/* Computes the frequency of occurrence of a character among the keywords
up to the given keyword. */
unsigned int compute_occurrence (unsigned int c, KeywordExt *curr) const;
/* Sorts the given set in increasing frequency of _occurrences[]. */ /* Sorts the given set in increasing frequency of _occurrences[]. */
void sort_by_occurrence (unsigned int *set, int len) const; void sort_by_occurrence (unsigned int *set, unsigned int len) const;
/* Sorts the given set in increasing frequency of occurrences among the
keywords up to the given keyword. */
void sort_by_occurrence (unsigned int *set, unsigned int len, KeywordExt *curr) const;
bool has_collisions (KeywordExt *curr); bool has_collisions (KeywordExt *curr);