1
0
mirror of https://git.savannah.gnu.org/git/gperf.git synced 2025-12-02 13:09:22 +00:00

New option --multiple-iterations.

This commit is contained in:
Bruno Haible
2003-01-15 13:01:25 +00:00
parent c67f999b54
commit c3467c5302
11 changed files with 207 additions and 10 deletions

View File

@@ -1,3 +1,26 @@
2002-11-04 Bruno Haible <bruno@clisp.org>
* src/options.h (Options::_asso_iterations): New field.
(Options::get_asso_iterations): New method declaration.
* src/options.icc (Options::get_asso_iterations): New method.
* src/options.cc (Options::short_usage): Mention j<jump> and m<num>.
(Options::long_usage): Document option -m.
(Options::Options): Initialize _asso_iterations.
(Options::~Options): Print _asso_iterations too.
(long_options): Add --multiple-iterations.
(Options::parse_options): Handle option -m.
* src/keyword-list.h (copy_list, delete_list): New declarations.
* src/keyword-list.cc (copy_list, delete_list): New functions.
* src/search.h (Search::_initial_asso_value, Search::_jump): New fields.
* src/search.cc (Search::prepare_asso_values): Initialize
_initial_asso_value and _jump here.
(Search::init_asso_values): Use _initial_asso_value.
(Search::try_asso_value): Use _jump.
(Search::optimize): If option -m was given, iterate over different
values for _initial_asso_value and _jump.
* doc/gperf.texi (Algorithmic Details): Document option -m.
* tests/test-6.exp: Update.
2002-11-03 Bruno Haible <bruno@clisp.org> 2002-11-03 Bruno Haible <bruno@clisp.org>
Bug fix: When option -j 0 was used without option -r, the output was Bug fix: When option -j 0 was used without option -r, the output was

5
NEWS
View File

@@ -1,3 +1,8 @@
New in 2.8:
* Added option -m/--multiple-iterations that reduces the size of the
generated table.
New in 2.7.2: New in 2.7.2:
* Keywords may now be enclosed in double quotes; this permits the use of * Keywords may now be enclosed in double quotes; this permits the use of

View File

@@ -546,7 +546,7 @@ Also, in this case the @samp{-c} option is ignored.
There are @emph{many} options to @code{gperf}. They were added to make There are @emph{many} options to @code{gperf}. They were added to make
the program more convenient for use with real applications. ``On-line'' the program more convenient for use with real applications. ``On-line''
help is readily available via the @samp{-h} option. Here is the help is readily available via the @samp{--help} option. Here is the
complete list of options. complete list of options.
@menu @menu
@@ -802,6 +802,12 @@ iterate when resolving a collision. `0' means iterate by the number of
keywords. This option is probably most useful when used in conjunction keywords. This option is probably most useful when used in conjunction
with options @samp{-D} and/or @samp{-S} for @emph{large} keyword sets. with options @samp{-D} and/or @samp{-S} for @emph{large} keyword sets.
@item -m @var{iterations}
@itemx --multiple-iterations=@var{iterations}
Perform multiple choices of the @samp{-i} and @samp{-j} values, and
choose the best results. This increases the running time by a factor of
@var{iterations} but does a good job minimizing the generated table size.
@item -i @var{initial-value} @item -i @var{initial-value}
@itemx --initial-asso=@var{initial-value} @itemx --initial-asso=@var{initial-value}
Provides an initial @var{value} for the associate values array. Default Provides an initial @var{value} for the associate values array. Default

View File

@@ -25,18 +25,60 @@
#include <stddef.h> #include <stddef.h>
/* -------------------------- Keyword_List class --------------------------- */
/* Constructor. */ /* Constructor. */
Keyword_List::Keyword_List (Keyword *car) Keyword_List::Keyword_List (Keyword *car)
: _cdr (NULL), _car (car) : _cdr (NULL), _car (car)
{ {
} }
/* ------------------------- KeywordExt_List class ------------------------- */
/* Unused constructor. */ /* Unused constructor. */
KeywordExt_List::KeywordExt_List (KeywordExt *car) KeywordExt_List::KeywordExt_List (KeywordExt *car)
: Keyword_List (car) : Keyword_List (car)
{ {
} }
/* ------------------------ Keyword_List functions ------------------------- */
/* Copies a linear list, sharing the list elements. */
Keyword_List *
copy_list (Keyword_List *list)
{
Keyword_List *result;
Keyword_List **lastp = &result;
while (list != NULL)
{
Keyword_List *new_cons = new Keyword_List (list->first());
*lastp = new_cons;
lastp = &new_cons->rest();
list = list->rest();
}
*lastp = NULL;
return result;
}
/* Copies a linear list, sharing the list elements. */
KeywordExt_List *
copy_list (KeywordExt_List *list)
{
return static_cast<KeywordExt_List *> (copy_list (static_cast<Keyword_List *> (list)));
}
/* Deletes a linear list, keeping the list elements in memory. */
void
delete_list (Keyword_List *list)
{
while (list != NULL)
{
Keyword_List *rest = list->rest();
delete list;
list = rest;
}
}
#ifndef __OPTIMIZE__ #ifndef __OPTIMIZE__

View File

@@ -57,6 +57,13 @@ public:
KeywordExt_List *& rest (); KeywordExt_List *& rest ();
}; };
/* Copies a linear list, sharing the list elements. */
extern Keyword_List * copy_list (Keyword_List *list);
extern KeywordExt_List * copy_list (KeywordExt_List *list);
/* Deletes a linear list, keeping the list elements in memory. */
extern void delete_list (Keyword_List *list);
#ifdef __OPTIMIZE__ #ifdef __OPTIMIZE__
#define INLINE inline #define INLINE inline

View File

@@ -65,7 +65,7 @@ static const char *const DEFAULT_DELIMITERS = ",\n";
void void
Options::short_usage (FILE * stream) const Options::short_usage (FILE * stream) const
{ {
fprintf (stream, "Usage: %s [-cCdDef[num]F<initializers>GhH<hashname>i<init>Ijk<keys>K<keyname>lL<language>nN<function name>ors<size>S<switches>tTvW<wordlistname>Z<class name>7] [input-file]\n" fprintf (stream, "Usage: %s [-cCdDef[num]F<initializers>GhH<hashname>i<init>Ij<jump>k<keys>K<keyname>lL<language>m<num>nN<function name>ors<size>S<switches>tTvW<wordlistname>Z<class name>7] [input-file]\n"
"Try '%s --help' for more information.\n", "Try '%s --help' for more information.\n",
program_name, program_name); program_name, program_name);
} }
@@ -192,6 +192,12 @@ Options::long_usage (FILE * stream) const
" argument represents the number of times to iterate\n" " argument represents the number of times to iterate\n"
" when resolving a collision. '0' means \"iterate by\n" " when resolving a collision. '0' means \"iterate by\n"
" the number of keywords\".\n"); " the number of keywords\".\n");
fprintf (stream,
" -m, --multiple-iterations=ITERATIONS\n"
" Perform multiple choices of the -i and -j values,\n"
" and choose the best results. This increases the\n"
" running time by a factor of ITERATIONS but does a\n"
" good job minimizing the generated table size.\n");
fprintf (stream, fprintf (stream,
" -i, --initial-asso=N Provide an initial value for the associate values\n" " -i, --initial-asso=N Provide an initial value for the associate values\n"
" array. Default is 0. Setting this value larger helps\n" " array. Default is 0. Setting this value larger helps\n"
@@ -413,6 +419,7 @@ Options::Options ()
_iterations (0), _iterations (0),
_jump (DEFAULT_JUMP_VALUE), _jump (DEFAULT_JUMP_VALUE),
_initial_asso_value (0), _initial_asso_value (0),
_asso_iterations (0),
_total_switches (1), _total_switches (1),
_size_multiple (1), _size_multiple (1),
_function_name (DEFAULT_NAME), _function_name (DEFAULT_NAME),
@@ -459,6 +466,7 @@ Options::~Options ()
"\nword list name = %s" "\nword list name = %s"
"\nkey name = %s" "\nkey name = %s"
"\ninitializer suffix = %s" "\ninitializer suffix = %s"
"\nasso_values iterations = %d"
"\njump value = %d" "\njump value = %d"
"\nhash table size multiplier = %d" "\nhash table size multiplier = %d"
"\ninitial associated value = %d" "\ninitial associated value = %d"
@@ -486,8 +494,8 @@ Options::~Options ()
_option_word & SEVENBIT ? "enabled" : "disabled", _option_word & SEVENBIT ? "enabled" : "disabled",
_iterations, _iterations,
_function_name, _hash_name, _wordlist_name, _key_name, _function_name, _hash_name, _wordlist_name, _key_name,
_initializer_suffix, _jump, _size_multiple, _initial_asso_value, _initializer_suffix, _asso_iterations, _jump, _size_multiple,
_delimiters, _total_switches); _initial_asso_value, _delimiters, _total_switches);
if (_option_word & ALLCHARS) if (_option_word & ALLCHARS)
fprintf (stderr, "all characters are used in the hash function\n"); fprintf (stderr, "all characters are used in the hash function\n");
else else
@@ -535,6 +543,7 @@ static const struct option long_options[] =
{ "fast", required_argument, NULL, 'f' }, { "fast", required_argument, NULL, 'f' },
{ "initial-asso", required_argument, NULL, 'i' }, { "initial-asso", required_argument, NULL, 'i' },
{ "jump", required_argument, NULL, 'j' }, { "jump", required_argument, NULL, 'j' },
{ "multiple-iterations", required_argument, NULL, 'm' },
{ "no-strlen", no_argument, NULL, 'n' }, { "no-strlen", no_argument, NULL, 'n' },
{ "occurrence-sort", no_argument, NULL, 'o' }, { "occurrence-sort", no_argument, NULL, 'o' },
{ "random", no_argument, NULL, 'r' }, { "random", no_argument, NULL, 'r' },
@@ -556,7 +565,7 @@ Options::parse_options (int argc, char *argv[])
while ((option_char = while ((option_char =
getopt_long (_argument_count, _argument_vector, getopt_long (_argument_count, _argument_vector,
"adcCDe:Ef:F:gGhH:i:Ij:k:K:lL:nN:oprs:S:tTvW:Z:7", "acCdDe:Ef:F:gGhH:i:Ij:k:K:lL:m:nN:oprs:S:tTvW:Z:7",
long_options, NULL)) long_options, NULL))
!= -1) != -1)
{ {
@@ -738,6 +747,15 @@ Options::parse_options (int argc, char *argv[])
} }
break; break;
} }
case 'm': /* Multiple iterations for finding good asso_values. */
{
if ((_asso_iterations = atoi (/*getopt*/optarg)) < 0)
{
fprintf (stderr, "asso_iterations value must not be negative, assuming 0\n");
_asso_iterations = 0;
}
break;
}
case 'n': /* Don't include the length when computing hash function. */ case 'n': /* Don't include the length when computing hash function. */
{ {
_option_word |= NOLENGTH; _option_word |= NOLENGTH;

View File

@@ -190,6 +190,9 @@ public:
/* Returns the initial associated character value. */ /* Returns the initial associated character value. */
int get_initial_asso_value () const; int get_initial_asso_value () const;
/* Returns the number of iterations for finding good asso_values. */
int get_asso_iterations () const;
/* Returns the total number of switch statements to generate. */ /* Returns the total number of switch statements to generate. */
int get_total_switches () const; int get_total_switches () const;
@@ -250,6 +253,9 @@ private:
/* Initial value for asso_values table. */ /* Initial value for asso_values table. */
int _initial_asso_value; int _initial_asso_value;
/* Number of attempts at finding good asso_values. */
int _asso_iterations;
/* Number of switch statements to generate. */ /* Number of switch statements to generate. */
int _total_switches; int _total_switches;

View File

@@ -146,6 +146,13 @@ Options::get_initial_asso_value () const
return _initial_asso_value; return _initial_asso_value;
} }
/* Returns the number of iterations for finding finding good asso_values. */
INLINE int
Options::get_asso_iterations () const
{
return _asso_iterations;
}
/* Returns the total number of switch statements to generate. */ /* Returns the total number of switch statements to generate. */
INLINE int INLINE int
Options::get_total_switches () const Options::get_total_switches () const

View File

@@ -434,6 +434,9 @@ Search::prepare_asso_values ()
if (option[RANDOM] || option.get_jump () == 0) if (option[RANDOM] || option.get_jump () == 0)
/* We will use rand(), so initialize the random number generator. */ /* We will use rand(), so initialize the random number generator. */
srand (reinterpret_cast<long>(time (0))); srand (reinterpret_cast<long>(time (0)));
_initial_asso_value = (option[RANDOM] ? -1 : option.get_initial_asso_value ());
_jump = option.get_jump ();
} }
/* Puts a first guess into asso_values[]. */ /* Puts a first guess into asso_values[]. */
@@ -441,14 +444,14 @@ Search::prepare_asso_values ()
void void
Search::init_asso_values () Search::init_asso_values ()
{ {
if (option[RANDOM]) if (_initial_asso_value < 0)
{ {
for (int i = 0; i < _alpha_size; i++) for (int i = 0; i < _alpha_size; i++)
_asso_values[i] = rand () & (_asso_value_max - 1); _asso_values[i] = rand () & (_asso_value_max - 1);
} }
else else
{ {
int asso_value = option.get_initial_asso_value (); int asso_value = _initial_asso_value;
asso_value = asso_value & (_asso_value_max - 1); asso_value = asso_value & (_asso_value_max - 1);
for (int i = 0; i < _alpha_size; i++) for (int i = 0; i < _alpha_size; i++)
@@ -565,7 +568,7 @@ Search::try_asso_value (unsigned char c, KeywordExt *curr, int iterations)
/* Try next value. Wrap around mod _asso_value_max. */ /* Try next value. Wrap around mod _asso_value_max. */
_asso_values[c] = _asso_values[c] =
(_asso_values[c] + (option.get_jump () ? option.get_jump () : rand ())) (_asso_values[c] + (_jump != 0 ? _jump : rand ()))
& (_asso_value_max - 1); & (_asso_value_max - 1);
/* Iteration Number array is a win, O(1) intialization time! */ /* Iteration Number array is a win, O(1) intialization time! */
@@ -733,7 +736,77 @@ Search::optimize ()
prepare_asso_values (); prepare_asso_values ();
/* Search for good _asso_values[]. */ /* Search for good _asso_values[]. */
find_asso_values (); int asso_iteration;
if ((asso_iteration = option.get_asso_iterations ()) == 0)
/* Try only the given _initial_asso_value and _jump. */
find_asso_values ();
else
{
/* Try different pairs of _initial_asso_value and _jump, in the
following order:
(0, 1)
(1, 1)
(2, 1) (0, 3)
(3, 1) (1, 3)
(4, 1) (2, 3) (0, 5)
(5, 1) (3, 3) (1, 5)
..... */
KeywordExt_List *saved_head = _head;
int best_initial_asso_value = 0;
int best_jump = 1;
int *best_asso_values = new int[_alpha_size];
int best_collisions = INT_MAX;
int best_max_hash_value = INT_MAX;
_initial_asso_value = 0; _jump = 1;
for (;;)
{
/* Restore the keyword list in its original order. */
_head = copy_list (saved_head);
/* Find good _asso_values[]. */
find_asso_values ();
/* Test whether it is the best solution so far. */
int collisions = 0;
int max_hash_value = INT_MIN;
_collision_detector->clear ();
for (KeywordExt_List *ptr = _head; ptr; ptr = ptr->rest())
{
KeywordExt *keyword = ptr->first();
int hashcode = compute_hash (keyword);
if (max_hash_value < hashcode)
max_hash_value = hashcode;
if (_collision_detector->set_bit (hashcode))
collisions++;
}
if (collisions < best_collisions
|| (collisions == best_collisions
&& max_hash_value < best_max_hash_value))
{
memcpy (best_asso_values, _asso_values,
_alpha_size * sizeof (_asso_values[0]));
best_collisions = collisions;
best_max_hash_value = max_hash_value;
}
/* Delete the copied keyword list. */
delete_list (_head);
if (--asso_iteration == 0)
break;
/* Prepare for next iteration. */
if (_initial_asso_value >= 2)
_initial_asso_value -= 2, _jump += 2;
else
_initial_asso_value += _jump, _jump = 1;
}
_head = saved_head;
/* Install the best found asso_values. */
_initial_asso_value = best_initial_asso_value;
_jump = best_jump;
memcpy (_asso_values, best_asso_values,
_alpha_size * sizeof (_asso_values[0]));
delete[] best_asso_values;
/* The keywords' _hash_value fields are recomputed below. */
}
/* Make one final check, just to make sure nothing weird happened.... */ /* Make one final check, just to make sure nothing weird happened.... */
_collision_detector->clear (); _collision_detector->clear ();
@@ -749,7 +822,7 @@ Search::optimize ()
{ {
fprintf (stderr, fprintf (stderr,
"\nInternal error, duplicate value %d:\n" "\nInternal error, duplicate value %d:\n"
"try options -D or -r, or use new key positions.\n\n", "try options -D or -m or -r, or use new key positions.\n\n",
hashcode); hashcode);
exit (1); exit (1);
} }

View File

@@ -132,6 +132,11 @@ private:
/* Exclusive upper bound for every _asso_values[c]. A power of 2. */ /* Exclusive upper bound for every _asso_values[c]. A power of 2. */
int _asso_value_max; int _asso_value_max;
/* Initial value for asso_values table. -1 means random. */
int _initial_asso_value;
/* Jump length when trying alternative values. 0 means random. */
int _jump;
/* Maximal possible hash value. */ /* Maximal possible hash value. */
int _max_hash_value; int _max_hash_value;

View File

@@ -87,6 +87,11 @@ Algorithm employed by gperf:
argument represents the number of times to iterate argument represents the number of times to iterate
when resolving a collision. '0' means "iterate by when resolving a collision. '0' means "iterate by
the number of keywords". the number of keywords".
-m, --multiple-iterations=ITERATIONS
Perform multiple choices of the -i and -j values,
and choose the best results. This increases the
running time by a factor of ITERATIONS but does a
good job minimizing the generated table size.
-i, --initial-asso=N Provide an initial value for the associate values -i, --initial-asso=N Provide an initial value for the associate values
array. Default is 0. Setting this value larger helps array. Default is 0. Setting this value larger helps
inflate the size of the final table. inflate the size of the final table.