1
0
mirror of https://git.savannah.gnu.org/git/gperf.git synced 2025-12-02 13:09:22 +00:00

New option --multiple-iterations.

This commit is contained in:
Bruno Haible
2003-01-15 13:01:25 +00:00
parent c67f999b54
commit c3467c5302
11 changed files with 207 additions and 10 deletions

View File

@@ -1,3 +1,26 @@
2002-11-04 Bruno Haible <bruno@clisp.org>
* src/options.h (Options::_asso_iterations): New field.
(Options::get_asso_iterations): New method declaration.
* src/options.icc (Options::get_asso_iterations): New method.
* src/options.cc (Options::short_usage): Mention j<jump> and m<num>.
(Options::long_usage): Document option -m.
(Options::Options): Initialize _asso_iterations.
(Options::~Options): Print _asso_iterations too.
(long_options): Add --multiple-iterations.
(Options::parse_options): Handle option -m.
* src/keyword-list.h (copy_list, delete_list): New declarations.
* src/keyword-list.cc (copy_list, delete_list): New functions.
* src/search.h (Search::_initial_asso_value, Search::_jump): New fields.
* src/search.cc (Search::prepare_asso_values): Initialize
_initial_asso_value and _jump here.
(Search::init_asso_values): Use _initial_asso_value.
(Search::try_asso_value): Use _jump.
(Search::optimize): If option -m was given, iterate over different
values for _initial_asso_value and _jump.
* doc/gperf.texi (Algorithmic Details): Document option -m.
* tests/test-6.exp: Update.
2002-11-03 Bruno Haible <bruno@clisp.org>
Bug fix: When option -j 0 was used without option -r, the output was

5
NEWS
View File

@@ -1,3 +1,8 @@
New in 2.8:
* Added option -m/--multiple-iterations that reduces the size of the
generated table.
New in 2.7.2:
* Keywords may now be enclosed in double quotes; this permits the use of

View File

@@ -546,7 +546,7 @@ Also, in this case the @samp{-c} option is ignored.
There are @emph{many} options to @code{gperf}. They were added to make
the program more convenient for use with real applications. ``On-line''
help is readily available via the @samp{-h} option. Here is the
help is readily available via the @samp{--help} option. Here is the
complete list of options.
@menu
@@ -802,6 +802,12 @@ iterate when resolving a collision. `0' means iterate by the number of
keywords. This option is probably most useful when used in conjunction
with options @samp{-D} and/or @samp{-S} for @emph{large} keyword sets.
@item -m @var{iterations}
@itemx --multiple-iterations=@var{iterations}
Perform multiple choices of the @samp{-i} and @samp{-j} values, and
choose the best results. This increases the running time by a factor of
@var{iterations} but does a good job minimizing the generated table size.
@item -i @var{initial-value}
@itemx --initial-asso=@var{initial-value}
Provides an initial @var{value} for the associate values array. Default

View File

@@ -25,18 +25,60 @@
#include <stddef.h>
/* -------------------------- Keyword_List class --------------------------- */
/* Constructor. */
Keyword_List::Keyword_List (Keyword *car)
: _cdr (NULL), _car (car)
{
}
/* ------------------------- KeywordExt_List class ------------------------- */
/* Unused constructor. */
KeywordExt_List::KeywordExt_List (KeywordExt *car)
: Keyword_List (car)
{
}
/* ------------------------ Keyword_List functions ------------------------- */
/* Copies a linear list, sharing the list elements. */
Keyword_List *
copy_list (Keyword_List *list)
{
Keyword_List *result;
Keyword_List **lastp = &result;
while (list != NULL)
{
Keyword_List *new_cons = new Keyword_List (list->first());
*lastp = new_cons;
lastp = &new_cons->rest();
list = list->rest();
}
*lastp = NULL;
return result;
}
/* Copies a linear list, sharing the list elements. */
KeywordExt_List *
copy_list (KeywordExt_List *list)
{
return static_cast<KeywordExt_List *> (copy_list (static_cast<Keyword_List *> (list)));
}
/* Deletes a linear list, keeping the list elements in memory. */
void
delete_list (Keyword_List *list)
{
while (list != NULL)
{
Keyword_List *rest = list->rest();
delete list;
list = rest;
}
}
#ifndef __OPTIMIZE__

View File

@@ -57,6 +57,13 @@ public:
KeywordExt_List *& rest ();
};
/* Copies a linear list, sharing the list elements. */
extern Keyword_List * copy_list (Keyword_List *list);
extern KeywordExt_List * copy_list (KeywordExt_List *list);
/* Deletes a linear list, keeping the list elements in memory. */
extern void delete_list (Keyword_List *list);
#ifdef __OPTIMIZE__
#define INLINE inline

View File

@@ -65,7 +65,7 @@ static const char *const DEFAULT_DELIMITERS = ",\n";
void
Options::short_usage (FILE * stream) const
{
fprintf (stream, "Usage: %s [-cCdDef[num]F<initializers>GhH<hashname>i<init>Ijk<keys>K<keyname>lL<language>nN<function name>ors<size>S<switches>tTvW<wordlistname>Z<class name>7] [input-file]\n"
fprintf (stream, "Usage: %s [-cCdDef[num]F<initializers>GhH<hashname>i<init>Ij<jump>k<keys>K<keyname>lL<language>m<num>nN<function name>ors<size>S<switches>tTvW<wordlistname>Z<class name>7] [input-file]\n"
"Try '%s --help' for more information.\n",
program_name, program_name);
}
@@ -192,6 +192,12 @@ Options::long_usage (FILE * stream) const
" argument represents the number of times to iterate\n"
" when resolving a collision. '0' means \"iterate by\n"
" the number of keywords\".\n");
fprintf (stream,
" -m, --multiple-iterations=ITERATIONS\n"
" Perform multiple choices of the -i and -j values,\n"
" and choose the best results. This increases the\n"
" running time by a factor of ITERATIONS but does a\n"
" good job minimizing the generated table size.\n");
fprintf (stream,
" -i, --initial-asso=N Provide an initial value for the associate values\n"
" array. Default is 0. Setting this value larger helps\n"
@@ -413,6 +419,7 @@ Options::Options ()
_iterations (0),
_jump (DEFAULT_JUMP_VALUE),
_initial_asso_value (0),
_asso_iterations (0),
_total_switches (1),
_size_multiple (1),
_function_name (DEFAULT_NAME),
@@ -459,6 +466,7 @@ Options::~Options ()
"\nword list name = %s"
"\nkey name = %s"
"\ninitializer suffix = %s"
"\nasso_values iterations = %d"
"\njump value = %d"
"\nhash table size multiplier = %d"
"\ninitial associated value = %d"
@@ -486,8 +494,8 @@ Options::~Options ()
_option_word & SEVENBIT ? "enabled" : "disabled",
_iterations,
_function_name, _hash_name, _wordlist_name, _key_name,
_initializer_suffix, _jump, _size_multiple, _initial_asso_value,
_delimiters, _total_switches);
_initializer_suffix, _asso_iterations, _jump, _size_multiple,
_initial_asso_value, _delimiters, _total_switches);
if (_option_word & ALLCHARS)
fprintf (stderr, "all characters are used in the hash function\n");
else
@@ -535,6 +543,7 @@ static const struct option long_options[] =
{ "fast", required_argument, NULL, 'f' },
{ "initial-asso", required_argument, NULL, 'i' },
{ "jump", required_argument, NULL, 'j' },
{ "multiple-iterations", required_argument, NULL, 'm' },
{ "no-strlen", no_argument, NULL, 'n' },
{ "occurrence-sort", no_argument, NULL, 'o' },
{ "random", no_argument, NULL, 'r' },
@@ -556,7 +565,7 @@ Options::parse_options (int argc, char *argv[])
while ((option_char =
getopt_long (_argument_count, _argument_vector,
"adcCDe:Ef:F:gGhH:i:Ij:k:K:lL:nN:oprs:S:tTvW:Z:7",
"acCdDe:Ef:F:gGhH:i:Ij:k:K:lL:m:nN:oprs:S:tTvW:Z:7",
long_options, NULL))
!= -1)
{
@@ -738,6 +747,15 @@ Options::parse_options (int argc, char *argv[])
}
break;
}
case 'm': /* Multiple iterations for finding good asso_values. */
{
if ((_asso_iterations = atoi (/*getopt*/optarg)) < 0)
{
fprintf (stderr, "asso_iterations value must not be negative, assuming 0\n");
_asso_iterations = 0;
}
break;
}
case 'n': /* Don't include the length when computing hash function. */
{
_option_word |= NOLENGTH;

View File

@@ -190,6 +190,9 @@ public:
/* Returns the initial associated character value. */
int get_initial_asso_value () const;
/* Returns the number of iterations for finding good asso_values. */
int get_asso_iterations () const;
/* Returns the total number of switch statements to generate. */
int get_total_switches () const;
@@ -250,6 +253,9 @@ private:
/* Initial value for asso_values table. */
int _initial_asso_value;
/* Number of attempts at finding good asso_values. */
int _asso_iterations;
/* Number of switch statements to generate. */
int _total_switches;

View File

@@ -146,6 +146,13 @@ Options::get_initial_asso_value () const
return _initial_asso_value;
}
/* Returns the number of iterations for finding finding good asso_values. */
INLINE int
Options::get_asso_iterations () const
{
return _asso_iterations;
}
/* Returns the total number of switch statements to generate. */
INLINE int
Options::get_total_switches () const

View File

@@ -434,6 +434,9 @@ Search::prepare_asso_values ()
if (option[RANDOM] || option.get_jump () == 0)
/* We will use rand(), so initialize the random number generator. */
srand (reinterpret_cast<long>(time (0)));
_initial_asso_value = (option[RANDOM] ? -1 : option.get_initial_asso_value ());
_jump = option.get_jump ();
}
/* Puts a first guess into asso_values[]. */
@@ -441,14 +444,14 @@ Search::prepare_asso_values ()
void
Search::init_asso_values ()
{
if (option[RANDOM])
if (_initial_asso_value < 0)
{
for (int i = 0; i < _alpha_size; i++)
_asso_values[i] = rand () & (_asso_value_max - 1);
}
else
{
int asso_value = option.get_initial_asso_value ();
int asso_value = _initial_asso_value;
asso_value = asso_value & (_asso_value_max - 1);
for (int i = 0; i < _alpha_size; i++)
@@ -565,7 +568,7 @@ Search::try_asso_value (unsigned char c, KeywordExt *curr, int iterations)
/* Try next value. Wrap around mod _asso_value_max. */
_asso_values[c] =
(_asso_values[c] + (option.get_jump () ? option.get_jump () : rand ()))
(_asso_values[c] + (_jump != 0 ? _jump : rand ()))
& (_asso_value_max - 1);
/* Iteration Number array is a win, O(1) intialization time! */
@@ -733,7 +736,77 @@ Search::optimize ()
prepare_asso_values ();
/* Search for good _asso_values[]. */
int asso_iteration;
if ((asso_iteration = option.get_asso_iterations ()) == 0)
/* Try only the given _initial_asso_value and _jump. */
find_asso_values ();
else
{
/* Try different pairs of _initial_asso_value and _jump, in the
following order:
(0, 1)
(1, 1)
(2, 1) (0, 3)
(3, 1) (1, 3)
(4, 1) (2, 3) (0, 5)
(5, 1) (3, 3) (1, 5)
..... */
KeywordExt_List *saved_head = _head;
int best_initial_asso_value = 0;
int best_jump = 1;
int *best_asso_values = new int[_alpha_size];
int best_collisions = INT_MAX;
int best_max_hash_value = INT_MAX;
_initial_asso_value = 0; _jump = 1;
for (;;)
{
/* Restore the keyword list in its original order. */
_head = copy_list (saved_head);
/* Find good _asso_values[]. */
find_asso_values ();
/* Test whether it is the best solution so far. */
int collisions = 0;
int max_hash_value = INT_MIN;
_collision_detector->clear ();
for (KeywordExt_List *ptr = _head; ptr; ptr = ptr->rest())
{
KeywordExt *keyword = ptr->first();
int hashcode = compute_hash (keyword);
if (max_hash_value < hashcode)
max_hash_value = hashcode;
if (_collision_detector->set_bit (hashcode))
collisions++;
}
if (collisions < best_collisions
|| (collisions == best_collisions
&& max_hash_value < best_max_hash_value))
{
memcpy (best_asso_values, _asso_values,
_alpha_size * sizeof (_asso_values[0]));
best_collisions = collisions;
best_max_hash_value = max_hash_value;
}
/* Delete the copied keyword list. */
delete_list (_head);
if (--asso_iteration == 0)
break;
/* Prepare for next iteration. */
if (_initial_asso_value >= 2)
_initial_asso_value -= 2, _jump += 2;
else
_initial_asso_value += _jump, _jump = 1;
}
_head = saved_head;
/* Install the best found asso_values. */
_initial_asso_value = best_initial_asso_value;
_jump = best_jump;
memcpy (_asso_values, best_asso_values,
_alpha_size * sizeof (_asso_values[0]));
delete[] best_asso_values;
/* The keywords' _hash_value fields are recomputed below. */
}
/* Make one final check, just to make sure nothing weird happened.... */
_collision_detector->clear ();
@@ -749,7 +822,7 @@ Search::optimize ()
{
fprintf (stderr,
"\nInternal error, duplicate value %d:\n"
"try options -D or -r, or use new key positions.\n\n",
"try options -D or -m or -r, or use new key positions.\n\n",
hashcode);
exit (1);
}

View File

@@ -132,6 +132,11 @@ private:
/* Exclusive upper bound for every _asso_values[c]. A power of 2. */
int _asso_value_max;
/* Initial value for asso_values table. -1 means random. */
int _initial_asso_value;
/* Jump length when trying alternative values. 0 means random. */
int _jump;
/* Maximal possible hash value. */
int _max_hash_value;

View File

@@ -87,6 +87,11 @@ Algorithm employed by gperf:
argument represents the number of times to iterate
when resolving a collision. '0' means "iterate by
the number of keywords".
-m, --multiple-iterations=ITERATIONS
Perform multiple choices of the -i and -j values,
and choose the best results. This increases the
running time by a factor of ITERATIONS but does a
good job minimizing the generated table size.
-i, --initial-asso=N Provide an initial value for the associate values
array. Default is 0. Setting this value larger helps
inflate the size of the final table.