mirror of
https://git.savannah.gnu.org/git/gperf.git
synced 2025-12-02 13:09:22 +00:00
Optimized choice during collision resolution.
This commit is contained in:
14
ChangeLog
14
ChangeLog
@@ -1,5 +1,19 @@
|
|||||||
2002-12-07 Bruno Haible <bruno@clisp.org>
|
2002-12-07 Bruno Haible <bruno@clisp.org>
|
||||||
|
|
||||||
|
* src/options.h (OPT_CHOICE): New enum value.
|
||||||
|
* src/options.cc (Options::~Options): Update.
|
||||||
|
(long_options): New option --optimized-collision-resolution.
|
||||||
|
(Options::parse_options): Accept option -O.
|
||||||
|
* src/search.h (Search::sort_by_occurrence): Change argument to
|
||||||
|
'unsigned int'.
|
||||||
|
(Search::compute_occurrence, Search::sort_by_occurrence): New method
|
||||||
|
declarations.
|
||||||
|
* src/search.cc (Search::sort_by_occurrence): Change argument to
|
||||||
|
'unsigned int'.
|
||||||
|
(Search::compute_occurrence, Search::sort_by_occurrence): New methods.
|
||||||
|
(Search::find_asso_values): Implement OPT_CHOICE. More debugging
|
||||||
|
output.
|
||||||
|
|
||||||
* src/search.cc (Search::prepare_asso_values) [DEBUG]: Also print
|
* src/search.cc (Search::prepare_asso_values) [DEBUG]: Also print
|
||||||
the keyword list in order.
|
the keyword list in order.
|
||||||
(Search::find_asso_values) [DEBUG]: Upon failure, print the union_set.
|
(Search::find_asso_values) [DEBUG]: Upon failure, print the union_set.
|
||||||
|
|||||||
@@ -473,6 +473,7 @@ Options::~Options ()
|
|||||||
"\nENUM is........: %s"
|
"\nENUM is........: %s"
|
||||||
"\nINCLUDE is.....: %s"
|
"\nINCLUDE is.....: %s"
|
||||||
"\nSEVENBIT is....: %s"
|
"\nSEVENBIT is....: %s"
|
||||||
|
"\nOPT_CHOICE is..: %s"
|
||||||
"\niterations = %d"
|
"\niterations = %d"
|
||||||
"\nlookup function name = %s"
|
"\nlookup function name = %s"
|
||||||
"\nhash function name = %s"
|
"\nhash function name = %s"
|
||||||
@@ -505,6 +506,7 @@ Options::~Options ()
|
|||||||
_option_word & ENUM ? "enabled" : "disabled",
|
_option_word & ENUM ? "enabled" : "disabled",
|
||||||
_option_word & INCLUDE ? "enabled" : "disabled",
|
_option_word & INCLUDE ? "enabled" : "disabled",
|
||||||
_option_word & SEVENBIT ? "enabled" : "disabled",
|
_option_word & SEVENBIT ? "enabled" : "disabled",
|
||||||
|
_option_word & OPT_CHOICE ? "enabled" : "disabled",
|
||||||
_iterations,
|
_iterations,
|
||||||
_function_name, _hash_name, _wordlist_name, _slot_name,
|
_function_name, _hash_name, _wordlist_name, _slot_name,
|
||||||
_initializer_suffix, _asso_iterations, _jump, _size_multiple,
|
_initializer_suffix, _asso_iterations, _jump, _size_multiple,
|
||||||
@@ -648,6 +650,7 @@ static const struct option long_options[] =
|
|||||||
{ "multiple-iterations", required_argument, NULL, 'm' },
|
{ "multiple-iterations", required_argument, NULL, 'm' },
|
||||||
{ "no-strlen", no_argument, NULL, 'n' },
|
{ "no-strlen", no_argument, NULL, 'n' },
|
||||||
{ "occurrence-sort", no_argument, NULL, 'o' },
|
{ "occurrence-sort", no_argument, NULL, 'o' },
|
||||||
|
{ "optimized-collision-resolution", no_argument, NULL, 'O' },
|
||||||
{ "random", no_argument, NULL, 'r' },
|
{ "random", no_argument, NULL, 'r' },
|
||||||
{ "size-multiple", required_argument, NULL, 's' },
|
{ "size-multiple", required_argument, NULL, 's' },
|
||||||
{ "help", no_argument, NULL, 'h' },
|
{ "help", no_argument, NULL, 'h' },
|
||||||
@@ -667,7 +670,7 @@ Options::parse_options (int argc, char *argv[])
|
|||||||
|
|
||||||
while ((option_char =
|
while ((option_char =
|
||||||
getopt_long (_argument_count, _argument_vector,
|
getopt_long (_argument_count, _argument_vector,
|
||||||
"acCdDe:Ef:F:gGhH:i:Ij:k:K:lL:m:nN:oprs:S:tTvW:Z:7",
|
"acCdDe:Ef:F:gGhH:i:Ij:k:K:lL:m:nN:oOprs:S:tTvW:Z:7",
|
||||||
long_options, NULL))
|
long_options, NULL))
|
||||||
!= -1)
|
!= -1)
|
||||||
{
|
{
|
||||||
@@ -862,6 +865,11 @@ Options::parse_options (int argc, char *argv[])
|
|||||||
_option_word |= ORDER;
|
_option_word |= ORDER;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case 'O': /* Optimized choice during collision resolution. */
|
||||||
|
{
|
||||||
|
_option_word |= OPT_CHOICE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
case 'p': /* Generated lookup function a pointer instead of int. */
|
case 'p': /* Generated lookup function a pointer instead of int. */
|
||||||
break; /* This is now the default. */
|
break; /* This is now the default. */
|
||||||
case 'r': /* Utilize randomness to initialize the associated values table. */
|
case 'r': /* Utilize randomness to initialize the associated values table. */
|
||||||
|
|||||||
@@ -101,7 +101,10 @@ enum Option_Type
|
|||||||
INCLUDE = 1 << 20,
|
INCLUDE = 1 << 20,
|
||||||
|
|
||||||
/* Assume 7-bit, not 8-bit, characters. */
|
/* Assume 7-bit, not 8-bit, characters. */
|
||||||
SEVENBIT = 1 << 21
|
SEVENBIT = 1 << 21,
|
||||||
|
|
||||||
|
/* Apply optimized collision resolution to speed-up search time. */
|
||||||
|
OPT_CHOICE = 1 << 22
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Class manager for gperf program Options. */
|
/* Class manager for gperf program Options. */
|
||||||
|
|||||||
111
src/search.cc
111
src/search.cc
@@ -27,6 +27,7 @@
|
|||||||
#include <stdlib.h> /* declares exit(), rand(), srand() */
|
#include <stdlib.h> /* declares exit(), rand(), srand() */
|
||||||
#include <string.h> /* declares memset(), memcmp() */
|
#include <string.h> /* declares memset(), memcmp() */
|
||||||
#include <time.h> /* declares time() */
|
#include <time.h> /* declares time() */
|
||||||
|
#include <math.h> /* declares exp() */
|
||||||
#include <limits.h> /* defines INT_MIN, INT_MAX, UINT_MAX */
|
#include <limits.h> /* defines INT_MIN, INT_MAX, UINT_MAX */
|
||||||
#include "options.h"
|
#include "options.h"
|
||||||
#include "hash-table.h"
|
#include "hash-table.h"
|
||||||
@@ -904,7 +905,7 @@ Search::compute_hash (KeywordExt *keyword) const
|
|||||||
const unsigned int *p = keyword->_selchars;
|
const unsigned int *p = keyword->_selchars;
|
||||||
int i = keyword->_selchars_length;
|
int i = keyword->_selchars_length;
|
||||||
for (; i > 0; p++, i--)
|
for (; i > 0; p++, i--)
|
||||||
sum += _asso_values[*p];
|
sum += _asso_values[*p];
|
||||||
|
|
||||||
return keyword->_hash_value = sum;
|
return keyword->_hash_value = sum;
|
||||||
}
|
}
|
||||||
@@ -962,20 +963,96 @@ compute_disjoint_union (const unsigned int *set_1, int size_1,
|
|||||||
/* Sorts the given set in increasing frequency of _occurrences[]. */
|
/* Sorts the given set in increasing frequency of _occurrences[]. */
|
||||||
|
|
||||||
inline void
|
inline void
|
||||||
Search::sort_by_occurrence (unsigned int *set, int len) const
|
Search::sort_by_occurrence (unsigned int *set, unsigned int len) const
|
||||||
{
|
{
|
||||||
/* Use bubble sort, since the set is typically short. */
|
/* Use bubble sort, since the set is typically short. */
|
||||||
for (int i = 1; i < len; i++)
|
for (unsigned int i = 1; i < len; i++)
|
||||||
{
|
{
|
||||||
int curr;
|
unsigned int j;
|
||||||
unsigned int tmp;
|
unsigned int tmp;
|
||||||
|
|
||||||
for (curr = i, tmp = set[curr];
|
for (j = i, tmp = set[j];
|
||||||
curr > 0 && _occurrences[tmp] < _occurrences[set[curr-1]];
|
j > 0 && _occurrences[tmp] < _occurrences[set[j-1]];
|
||||||
curr--)
|
j--)
|
||||||
set[curr] = set[curr - 1];
|
set[j] = set[j - 1];
|
||||||
|
|
||||||
set[curr] = tmp;
|
set[j] = tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Computes the frequency of occurrence of a character among the keywords
|
||||||
|
up to the given keyword. */
|
||||||
|
inline unsigned int
|
||||||
|
Search::compute_occurrence (unsigned int c, KeywordExt *curr) const
|
||||||
|
{
|
||||||
|
unsigned int occurrence = 0;
|
||||||
|
|
||||||
|
for (KeywordExt_List *temp = _head; ; temp = temp->rest())
|
||||||
|
{
|
||||||
|
KeywordExt *keyword = temp->first();
|
||||||
|
|
||||||
|
int m = keyword->_selchars_length;
|
||||||
|
for (int i = 0; i < m; i++)
|
||||||
|
if (keyword->_selchars[i] == c)
|
||||||
|
{
|
||||||
|
occurrence++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (keyword == curr)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return occurrence;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Sorts the given set in increasing frequency of occurrences among the
|
||||||
|
keywords up to the given keyword. */
|
||||||
|
|
||||||
|
inline void
|
||||||
|
Search::sort_by_occurrence (unsigned int *set, unsigned int len, KeywordExt *curr) const
|
||||||
|
{
|
||||||
|
unsigned int occurrences[len];
|
||||||
|
|
||||||
|
for (unsigned int j = 0; j < len; j++)
|
||||||
|
occurrences[j] = 0;
|
||||||
|
for (KeywordExt_List *temp = _head; ; temp = temp->rest())
|
||||||
|
{
|
||||||
|
KeywordExt *keyword = temp->first();
|
||||||
|
|
||||||
|
int m = keyword->_selchars_length;
|
||||||
|
for (unsigned int j = 0; j < len; j++)
|
||||||
|
{
|
||||||
|
unsigned int c = set[j];
|
||||||
|
|
||||||
|
for (int i = 0; i < m; i++)
|
||||||
|
if (keyword->_selchars[i] == c)
|
||||||
|
{
|
||||||
|
occurrences[j]++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (keyword == curr)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Use bubble sort, since the set is typically short. */
|
||||||
|
for (unsigned int i = 1; i < len; i++)
|
||||||
|
{
|
||||||
|
unsigned int j;
|
||||||
|
unsigned int set_tmp, occ_tmp;
|
||||||
|
|
||||||
|
for (j = i, set_tmp = set[j], occ_tmp = occurrences[j];
|
||||||
|
j > 0 && occ_tmp < occurrences[j-1];
|
||||||
|
j--)
|
||||||
|
{
|
||||||
|
set[j] = set[j - 1];
|
||||||
|
occurrences[j] = occurrences[j - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
set[j] = set_tmp;
|
||||||
|
occurrences[j] = occ_tmp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1152,12 +1229,26 @@ Search::find_asso_values ()
|
|||||||
/* Sort by decreasing occurrence: Try least-used characters c first.
|
/* Sort by decreasing occurrence: Try least-used characters c first.
|
||||||
The idea is that this reduces the number of freshly introduced
|
The idea is that this reduces the number of freshly introduced
|
||||||
collisions. */
|
collisions. */
|
||||||
sort_by_occurrence (union_set, union_set_length);
|
if (option[OPT_CHOICE])
|
||||||
|
sort_by_occurrence (union_set, union_set_length, curr);
|
||||||
|
else
|
||||||
|
sort_by_occurrence (union_set, union_set_length);
|
||||||
|
|
||||||
for (union_index = 0; union_index < union_set_length; union_index++)
|
for (union_index = 0; union_index < union_set_length; union_index++)
|
||||||
{
|
{
|
||||||
c = union_set[union_index];
|
c = union_set[union_index];
|
||||||
|
|
||||||
|
if (option[DEBUG])
|
||||||
|
{
|
||||||
|
unsigned int n = sp - stack + 1;
|
||||||
|
unsigned int o = compute_occurrence (c, curr);
|
||||||
|
fprintf (stderr, "Expected number of iterations between %g and %g\n",
|
||||||
|
exp (static_cast<double>((2*n-o)*o)
|
||||||
|
/ static_cast<double>(2*_max_hash_value)),
|
||||||
|
exp (static_cast<double>((2*n-o)*o)
|
||||||
|
/ static_cast<double>(2*_asso_value_max)));
|
||||||
|
}
|
||||||
|
|
||||||
/* Try various other values for _asso_values[c]. A value is
|
/* Try various other values for _asso_values[c]. A value is
|
||||||
successful if, with it, the recomputed hash values for the
|
successful if, with it, the recomputed hash values for the
|
||||||
keywords from _head->first() to curr - inclusive - give no
|
keywords from _head->first() to curr - inclusive - give no
|
||||||
|
|||||||
@@ -90,8 +90,15 @@ private:
|
|||||||
and stores it in keyword->_hash_value. */
|
and stores it in keyword->_hash_value. */
|
||||||
int compute_hash (KeywordExt *keyword) const;
|
int compute_hash (KeywordExt *keyword) const;
|
||||||
|
|
||||||
|
/* Computes the frequency of occurrence of a character among the keywords
|
||||||
|
up to the given keyword. */
|
||||||
|
unsigned int compute_occurrence (unsigned int c, KeywordExt *curr) const;
|
||||||
|
|
||||||
/* Sorts the given set in increasing frequency of _occurrences[]. */
|
/* Sorts the given set in increasing frequency of _occurrences[]. */
|
||||||
void sort_by_occurrence (unsigned int *set, int len) const;
|
void sort_by_occurrence (unsigned int *set, unsigned int len) const;
|
||||||
|
/* Sorts the given set in increasing frequency of occurrences among the
|
||||||
|
keywords up to the given keyword. */
|
||||||
|
void sort_by_occurrence (unsigned int *set, unsigned int len, KeywordExt *curr) const;
|
||||||
|
|
||||||
bool has_collisions (KeywordExt *curr);
|
bool has_collisions (KeywordExt *curr);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user