1
0
mirror of https://git.savannah.gnu.org/git/gperf.git synced 2025-12-02 13:09:22 +00:00

Rework the options handling.

This commit is contained in:
Bruno Haible
2002-11-13 18:18:48 +00:00
parent 5e5d12ca2d
commit c8f007fe8b
12 changed files with 739 additions and 439 deletions

View File

@@ -43,17 +43,17 @@ Gen_Perf::Gen_Perf ()
Key_List::read_keys ();
if (option[ORDER])
reorder ();
asso_value_max = option.get_asso_max ();
non_linked_length = Key_List::keyword_list_length ();
_num_done = 1;
_fewest_collisions = 0;
asso_value_max = option.get_size_multiple ();
non_linked_length = Key_List::keyword_list_length ();
if (asso_value_max == 0)
asso_value_max = non_linked_length;
else if (asso_value_max > 0)
asso_value_max *= non_linked_length;
else /* if (asso_value_max < 0) */
asso_value_max = non_linked_length / -asso_value_max;
option.set_asso_max (POW (asso_value_max));
set_asso_max (POW (asso_value_max));
if (option[RANDOM])
{
@@ -64,14 +64,14 @@ Gen_Perf::Gen_Perf ()
}
else
{
int asso_value = option.initial_value ();
int asso_value = option.get_initial_asso_value ();
if (asso_value) /* Initialize array if user requests non-zero default. */
for (int i = ALPHA_SIZE - 1; i >= 0; i--)
_asso_values[i] = asso_value & option.get_asso_max () - 1;
_asso_values[i] = asso_value & get_asso_max () - 1;
}
_max_hash_value = Key_List::max_key_length () + option.get_asso_max () *
option.get_max_keysig_size ();
_max_hash_value = Key_List::max_key_length () + get_asso_max () *
get_max_keysig_size ();
_collision_detector = new Bool_Array (_max_hash_value + 1);
if (option[DEBUG])
@@ -172,7 +172,7 @@ Gen_Perf::affects_prev (char c, KeywordExt *curr)
{
int original_char = _asso_values[(unsigned char)c];
int total_iterations = !option[FAST]
? option.get_asso_max () : option.get_iterations () ? option.get_iterations () : keyword_list_length ();
? get_asso_max () : option.get_iterations () ? option.get_iterations () : keyword_list_length ();
/* Try all legal associated values. */
@@ -182,7 +182,7 @@ Gen_Perf::affects_prev (char c, KeywordExt *curr)
_asso_values[(unsigned char)c] =
(_asso_values[(unsigned char)c] + (option.get_jump () ? option.get_jump () : rand ()))
& (option.get_asso_max () - 1);
& (get_asso_max () - 1);
/* Iteration Number array is a win, O(1) intialization time! */
_collision_detector->clear ();
@@ -221,7 +221,7 @@ Gen_Perf::change (KeywordExt *prior, KeywordExt *curr)
int union_set_length;
if (!union_set)
union_set = new char [2 * option.get_max_keysig_size ()];
union_set = new char [2 * get_max_keysig_size ()];
if (option[DEBUG])
{
@@ -265,7 +265,7 @@ Gen_Perf::change (KeywordExt *prior, KeywordExt *curr)
if (option[DEBUG])
{
fprintf (stderr, "** collision not resolved after %d iterations, %d duplicates remain, continuing...\n",
!option[FAST] ? option.get_asso_max () : option.get_iterations () ? option.get_iterations () : keyword_list_length (),
!option[FAST] ? get_asso_max () : option.get_iterations () ? option.get_iterations () : keyword_list_length (),
_fewest_collisions + _total_duplicates);
fflush (stderr);
}

View File

@@ -44,7 +44,18 @@ Hash_Table::~Hash_Table ()
{
if (option[DEBUG])
{
int field_width = option.get_max_keysig_size ();
int field_width;
if (option[ALLCHARS])
{
field_width = 0;
for (int i = _size - 1; i >= 0; i--)
if (_table[i])
if (field_width < _table[i]->_selchars_length)
field_width = _table[i]->_selchars_length;
}
else
field_width = option.get_max_keysig_size ();
fprintf (stderr,
"\ndumping the hash table\n"

View File

@@ -449,8 +449,6 @@ Key_List::read_keys ()
fprintf (stderr, "Empty input key is not allowed.\nTo recognize an empty input key, your code should check for\nlen == 0 before calling the gperf generated lookup function.\n");
exit (1);
}
if (option[ALLCHARS])
option.set_keysig_size (_max_key_len);
}
}
@@ -624,7 +622,7 @@ Key_List::sort ()
void
Key_List::dump ()
{
int field_width = option.get_max_keysig_size ();
int field_width = get_max_keysig_size ();
fprintf (stderr, "\nList contents are:\n(hash value, key length, index, %*s, keyword):\n",
field_width, "selchars");
@@ -667,3 +665,10 @@ Key_List::max_key_length ()
return _max_key_len;
}
/* Returns number of key positions. */
int
Key_List::get_max_keysig_size ()
{
return option[ALLCHARS] ? _max_key_len : option.get_max_keysig_size ();
}

View File

@@ -51,6 +51,7 @@ private:
int _list_len; /* Length of head's Key_List, not counting duplicates. */
protected:
int _total_keys; /* Total number of keys, counting duplicates. */
int _size; /* Range of the hash table. */
private:
static int _determined[MAX_ALPHA_SIZE]; /* Used in function reorder, below. */
static int get_occurrence (KeywordExt *ptr);
@@ -79,6 +80,9 @@ public:
void reorder ();
void sort ();
void read_keys ();
int get_max_keysig_size ();
void set_asso_max (int r) { _size = r; }
int get_asso_max () { return _size; }
};
#endif

View File

@@ -65,21 +65,21 @@ void KeywordExt::init_selchars (Vectors *v)
char *key_set =
new char[(option[ALLCHARS] ? _allchars_length : option.get_max_keysig_size ())];
char *ptr = key_set;
int i;
if (option[ALLCHARS])
/* Use all the character positions in the KEY. */
for (i = _allchars_length; i > 0; k++, ptr++, i--)
for (int i = _allchars_length; i > 0; k++, ptr++, i--)
v->_occurrences[(unsigned char)(*ptr = *k)]++;
else
/* Only use those character positions specified by the user. */
{
/* Iterate through the list of key_positions, initializing occurrences
table and selchars (via char * pointer ptr). */
PositionIterator iter (option.get_key_positions ());
for (option.reset (); (i = option.get ()) != EOS; )
for (int i; (i = iter.next ()) != PositionIterator::EOS; )
{
if (i == WORD_END)
if (i == Positions::LASTCHAR)
/* Special notation for last KEY position, i.e. '$'. */
*ptr = _allchars[_allchars_length - 1];
else if (i <= _allchars_length)

View File

@@ -29,7 +29,7 @@ int
main (int argc, char *argv[])
{
/* Sets the Options. */
option (argc, argv);
option.parse_options (argc, argv);
/* Initializes the key word list. */
Gen_Perf generate_table;

View File

@@ -27,122 +27,125 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
#include "vectors.h"
#include "version.h"
/* Global option coordinator for the entire program. */
/* Global option coordinator for the entire program. */
Options option;
/* Records the program name. */
/* Records the program name. */
const char *program_name;
/* Size to jump on a collision. */
/* Size to jump on a collision. */
static const int DEFAULT_JUMP_VALUE = 5;
/* Default name for generated lookup function. */
/* Default name for generated lookup function. */
static const char *const DEFAULT_NAME = "in_word_set";
/* Default name for the key component. */
/* Default name for the key component. */
static const char *const DEFAULT_KEY = "name";
/* Default struct initializer suffix. */
/* Default struct initializer suffix. */
static const char *const DEFAULT_INITIALIZER_SUFFIX = "";
/* Default name for the generated class. */
/* Default name for the generated class. */
static const char *const DEFAULT_CLASS_NAME = "Perfect_Hash";
/* Default name for generated hash function. */
/* Default name for generated hash function. */
static const char *const DEFAULT_HASH_NAME = "hash";
/* Default name for generated hash table array. */
/* Default name for generated hash table array. */
static const char *const DEFAULT_WORDLIST_NAME = "wordlist";
/* Default delimiters that separate keywords from their attributes. */
/* Default delimiters that separate keywords from their attributes. */
static const char *const DEFAULT_DELIMITERS = ",\n";
int Options::_option_word;
int Options::_total_switches;
int Options::_total_keysig_size;
int Options::_size;
int Options::_key_pos;
int Options::_jump;
int Options::_initial_asso_value;
int Options::_argument_count;
int Options::_iterations;
char **Options::_argument_vector;
const char *Options::_function_name;
const char *Options::_key_name;
const char *Options::_initializer_suffix;
const char *Options::_class_name;
const char *Options::_hash_name;
const char *Options::_wordlist_name;
const char *Options::_delimiters;
char Options::_key_positions[MAX_KEY_POS];
/* Prints program usage to given stream. */
void
Options::short_usage (FILE * strm)
Options::short_usage (FILE * stream) const
{
fprintf (strm, "Usage: %s [-cCdDef[num]F<initializers>GhH<hashname>i<init>Ijk<keys>K<keyname>lL<language>nN<function name>ors<size>S<switches>tTvW<wordlistname>Z<class name>7] [input-file]\n"
"Try `%s --help' for more information.\n",
program_name, program_name);
fprintf (stream, "Usage: %s [-cCdDef[num]F<initializers>GhH<hashname>i<init>Ijk<keys>K<keyname>lL<language>nN<function name>ors<size>S<switches>tTvW<wordlistname>Z<class name>7] [input-file]\n"
"Try '%s --help' for more information.\n",
program_name, program_name);
}
void
Options::long_usage (FILE * strm)
Options::long_usage (FILE * stream) const
{
fprintf (strm,
"GNU `gperf' generates perfect hash functions.\n"
"\n"
"Usage: %s [OPTION]... [INPUT-FILE]\n"
"\n"
fprintf (stream,
"GNU 'gperf' generates perfect hash functions.\n");
fprintf (stream, "\n");
fprintf (stream,
"Usage: %s [OPTION]... [INPUT-FILE]\n",
program_name);
fprintf (stream, "\n");
fprintf (stream,
"If a long option shows an argument as mandatory, then it is mandatory\n"
"for the equivalent short option also.\n"
"\n"
"Input file interpretation:\n"
"for the equivalent short option also.\n");
fprintf (stream, "\n");
fprintf (stream,
"Input file interpretation:\n");
fprintf (stream,
" -e, --delimiters=DELIMITER-LIST\n"
" Allow user to provide a string containing delimiters\n"
" used to separate keywords from their attributes.\n"
" Default is \",\\n\".\n"
" Default is \",\\n\".\n");
fprintf (stream,
" -t, --struct-type Allows the user to include a structured type\n"
" declaration for generated code. Any text before %%%%\n"
" is considered part of the type declaration. Key\n"
" words and additional fields may follow this, one\n"
" group of fields per line.\n"
"\n"
"Language for the output code:\n"
" group of fields per line.\n");
fprintf (stream, "\n");
fprintf (stream,
"Language for the output code:\n");
fprintf (stream,
" -L, --language=LANGUAGE-NAME\n"
" Generates code in the specified language. Languages\n"
" handled are currently C++, ANSI-C, C, and KR-C. The\n"
" default is C.\n"
"\n"
"Details in the output code:\n"
" default is C.\n");
fprintf (stream, "\n");
fprintf (stream,
"Details in the output code:\n");
fprintf (stream,
" -K, --slot-name=NAME Select name of the keyword component in the keyword\n"
" structure.\n"
" structure.\n");
fprintf (stream,
" -F, --initializer-suffix=INITIALIZERS\n"
" Initializers for additional components in the keyword\n"
" structure.\n"
" structure.\n");
fprintf (stream,
" -H, --hash-fn-name=NAME\n"
" Specify name of generated hash function. Default is\n"
" `hash'.\n"
" 'hash'.\n");
fprintf (stream,
" -N, --lookup-fn-name=NAME\n"
" Specify name of generated lookup function. Default\n"
" name is `in_word_set'.\n"
" name is 'in_word_set'.\n");
fprintf (stream,
" -Z, --class-name=NAME Specify name of generated C++ class. Default name is\n"
" `Perfect_Hash'.\n"
" -7, --seven-bit Assume 7-bit characters.\n"
" 'Perfect_Hash'.\n");
fprintf (stream,
" -7, --seven-bit Assume 7-bit characters.\n");
fprintf (stream,
" -c, --compare-strncmp Generate comparison code using strncmp rather than\n"
" strcmp.\n"
" strcmp.\n");
fprintf (stream,
" -C, --readonly-tables Make the contents of generated lookup tables\n"
" constant, i.e., readonly.\n"
" constant, i.e., readonly.\n");
fprintf (stream,
" -E, --enum Define constant values using an enum local to the\n"
" lookup function rather than with defines.\n"
" lookup function rather than with defines.\n");
fprintf (stream,
" -I, --includes Include the necessary system include file <string.h>\n"
" at the beginning of the code.\n"
" at the beginning of the code.\n");
fprintf (stream,
" -G, --global Generate the static table of keywords as a static\n"
" global variable, rather than hiding it inside of the\n"
" lookup function (which is the default behavior).\n"
" lookup function (which is the default behavior).\n");
fprintf (stream,
" -W, --word-array-name=NAME\n"
" Specify name of word list array. Default name is\n"
" `wordlist'.\n"
" 'wordlist'.\n");
fprintf (stream,
" -S, --switch=COUNT Causes the generated C code to use a switch\n"
" statement scheme, rather than an array lookup table.\n"
" This can lead to a reduction in both time and space\n"
@@ -152,13 +155,16 @@ Options::long_usage (FILE * strm)
" elements, a value of 2 generates 2 tables with 1/2\n"
" the elements in each table, etc. If COUNT is very\n"
" large, say 1000000, the generated C code does a\n"
" binary search.\n"
" binary search.\n");
fprintf (stream,
" -T, --omit-struct-type\n"
" Prevents the transfer of the type declaration to the\n"
" output file. Use this option if the type is already\n"
" defined elsewhere.\n"
"\n"
"Algorithm employed by gperf:\n"
" defined elsewhere.\n");
fprintf (stream, "\n");
fprintf (stream,
"Algorithm employed by gperf:\n");
fprintf (stream,
" -k, --key-positions=KEYS\n"
" Select the key positions used in the hash function.\n"
" The allowable choices range between 1-%d, inclusive.\n"
@@ -166,59 +172,71 @@ Options::long_usage (FILE * strm)
" used, and key positions may occur in any order.\n"
" Also, the meta-character '*' causes the generated\n"
" hash function to consider ALL key positions, and $\n"
" indicates the ``final character'' of a key, e.g.,\n"
" $,1,2,4,6-10.\n"
" indicates the \"final character\" of a key, e.g.,\n"
" $,1,2,4,6-10.\n",
Positions::MAX_KEY_POS - 1);
fprintf (stream,
" -l, --compare-strlen Compare key lengths before trying a string\n"
" comparison. This helps cut down on the number of\n"
" string comparisons made during the lookup.\n"
" string comparisons made during the lookup.\n");
fprintf (stream,
" -D, --duplicates Handle keywords that hash to duplicate values. This\n"
" is useful for certain highly redundant keyword sets.\n"
" -f, --fast=ITERATIONS Generate the gen-perf.hash function ``fast''. This\n"
" is useful for certain highly redundant keyword sets.\n");
fprintf (stream,
" -f, --fast=ITERATIONS Generate the gen-perf.hash function \"fast\". This\n"
" decreases gperf's running time at the cost of\n"
" minimizing generated table size. The numeric\n"
" argument represents the number of times to iterate\n"
" when resolving a collision. `0' means ``iterate by\n"
" the number of keywords''.\n"
" when resolving a collision. '0' means \"iterate by\n"
" the number of keywords\".\n");
fprintf (stream,
" -i, --initial-asso=N Provide an initial value for the associate values\n"
" array. Default is 0. Setting this value larger helps\n"
" inflate the size of the final table.\n"
" -j, --jump=JUMP-VALUE Affects the ``jump value'', i.e., how far to advance\n"
" inflate the size of the final table.\n");
fprintf (stream,
" -j, --jump=JUMP-VALUE Affects the \"jump value\", i.e., how far to advance\n"
" the associated character value upon collisions. Must\n"
" be an odd number, default is %d.\n"
" be an odd number, default is %d.\n",
DEFAULT_JUMP_VALUE);
fprintf (stream,
" -n, --no-strlen Do not include the length of the keyword when\n"
" computing the hash function.\n"
" computing the hash function.\n");
fprintf (stream,
" -o, --occurrence-sort Reorders input keys by frequency of occurrence of\n"
" the key sets. This should decrease the search time\n"
" dramatically.\n"
" dramatically.\n");
fprintf (stream,
" -r, --random Utilizes randomness to initialize the associated\n"
" values table.\n"
" values table.\n");
fprintf (stream,
" -s, --size-multiple=N Affects the size of the generated hash table. The\n"
" numeric argument N indicates ``how many times larger\n"
" or smaller'' the associated value range should be,\n"
" numeric argument N indicates \"how many times larger\n"
" or smaller\" the associated value range should be,\n"
" in relationship to the number of keys, e.g. a value\n"
" of 3 means ``allow the maximum associated value to\n"
" of 3 means \"allow the maximum associated value to\n"
" be about 3 times larger than the number of input\n"
" keys.'' Conversely, a value of -3 means ``make the\n"
" keys\". Conversely, a value of -3 means \"make the\n"
" maximum associated value about 3 times smaller than\n"
" the number of input keys. A larger table should\n"
" the number of input keys\". A larger table should\n"
" decrease the time required for an unsuccessful\n"
" search, at the expense of extra table space. Default\n"
" value is 1.\n"
"\n"
" value is 1.\n");
fprintf (stream, "\n");
fprintf (stream,
"Informative output:\n"
" -h, --help Print this message.\n"
" -v, --version Print the gperf version number.\n"
" -d, --debug Enables the debugging option (produces verbose\n"
" output to the standard error).\n"
"\n"
"Report bugs to <bug-gnu-utils@gnu.org>.\n"
, program_name, MAX_KEY_POS - 1, DEFAULT_JUMP_VALUE);
" output to the standard error).\n");
fprintf (stream, "\n");
fprintf (stream,
"Report bugs to <bug-gnu-utils@gnu.org>.\n");
}
/* Output command-line Options. */
/* Prints the given options. */
void
Options::print_options ()
Options::print_options () const
{
int i;
@@ -228,7 +246,7 @@ Options::print_options ()
{
const char *arg = _argument_vector[i];
/* Escape arg if it contains shell metacharacters. */
/* Escape arg if it contains shell metacharacters. */
if (*arg == '-')
{
putchar (*arg);
@@ -278,110 +296,96 @@ Options::print_options ()
class PositionStringParser
{
public:
PositionStringParser (const char *s, int lo, int hi, int word_end, int bad_val, int key_end);
PositionStringParser (const char *str, int low_bound, int high_bound, int end_word_marker, int error_value, int end_marker);
int nextPosition ();
private:
const char *str; /* A pointer to the string provided by the user. */
int end; /* Value returned after last key is processed. */
int end_word; /* A value marking the abstract ``end of word'' ( usually '$'). */
int error_value; /* Error value returned when input is syntactically erroneous. */
int hi_bound; /* Greatest possible value, inclusive. */
int lo_bound; /* Smallest possible value, inclusive. */
int size;
int curr_value;
int upper_bound;
/* A pointer to the string provided by the user. */
const char * _str;
/* Smallest possible value, inclusive. */
int const _low_bound;
/* Greatest possible value, inclusive. */
int const _high_bound;
/* A value marking the abstract "end of word" ( usually '$'). */
int const _end_word_marker;
/* Error value returned when input is syntactically erroneous. */
int const _error_value;
/* Value returned after last key is processed. */
int const _end_marker;
int _size;
int _curr_value;
int _upper_bound;
};
PositionStringParser::PositionStringParser (const char *s, int lo, int hi, int word_end, int bad_val, int key_end)
: str (s), end (key_end), end_word (word_end), error_value (bad_val), hi_bound (hi), lo_bound (lo),
size (0), curr_value (0), upper_bound (0)
PositionStringParser::PositionStringParser (const char *str, int low_bound, int high_bound, int end_word_marker, int error_value, int end_marker)
: _str (str),
_low_bound (low_bound),
_high_bound (high_bound),
_end_word_marker (end_word_marker),
_error_value (error_value),
_end_marker (end_marker),
_size (0),
_curr_value (0),
_upper_bound (0)
{
}
int PositionStringParser::nextPosition ()
{
if (size)
if (_size)
{
if (++curr_value >= upper_bound)
size = 0;
return curr_value;
if (++_curr_value >= _upper_bound)
_size = 0;
return _curr_value;
}
else
{
while (*str)
switch (*str)
while (*_str)
switch (*_str)
{
default: return error_value;
case ',': str++; break;
case '$': str++; return end_word;
default: return _error_value;
case ',': _str++; break;
case '$': _str++; return _end_word_marker;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
for (curr_value = 0; isdigit ((unsigned char)(*str)); str++)
curr_value = curr_value * 10 + (*str - '0');
for (_curr_value = 0; isdigit ((unsigned char)(*_str)); _str++)
_curr_value = _curr_value * 10 + (*_str - '0');
if (*str == '-')
if (*_str == '-')
{
for (size = 1, upper_bound = 0;
isdigit ((unsigned char)(*++str));
upper_bound = upper_bound * 10 + (*str - '0'));
for (_size = 1, _upper_bound = 0;
isdigit ((unsigned char)(*++_str));
_upper_bound = _upper_bound * 10 + (*_str - '0'));
if (upper_bound <= curr_value || upper_bound > hi_bound)
return error_value;
if (_upper_bound <= _curr_value || _upper_bound > _high_bound)
return _error_value;
}
return curr_value >= lo_bound && curr_value <= hi_bound
? curr_value : error_value;
return _curr_value >= _low_bound && _curr_value <= _high_bound
? _curr_value : _error_value;
}
return end;
return _end_marker;
}
}
/* Sorts the key positions *IN REVERSE ORDER!!*
This makes further routines more efficient. Especially when generating code.
Uses a simple Insertion Sort since the set is probably ordered.
Returns 1 if there are no duplicates, 0 otherwise. */
inline int
Options::key_sort (char *base, int len)
{
/* Bubble sort. */
for (int i = 1; i < len; i++)
{
int j;
int tmp;
for (j = i, tmp = base[j]; j > 0 && tmp >= base[j - 1]; j--)
if ((base[j] = base[j - 1]) == tmp) /* oh no, a duplicate!!! */
return 0;
base[j] = tmp;
}
return 1;
}
/* Sets the default Options. */
Options::Options ()
: _option_word (DEFAULTCHARS | C),
_iterations (0),
_jump (DEFAULT_JUMP_VALUE),
_initial_asso_value (0),
_total_switches (1),
_size_multiple (1),
_function_name (DEFAULT_NAME),
_key_name (DEFAULT_KEY),
_initializer_suffix (DEFAULT_INITIALIZER_SUFFIX),
_class_name (DEFAULT_CLASS_NAME),
_hash_name (DEFAULT_HASH_NAME),
_wordlist_name (DEFAULT_WORDLIST_NAME),
_delimiters (DEFAULT_DELIMITERS),
_key_positions (1, Positions::LASTCHAR)
{
_key_positions[0] = WORD_START;
_key_positions[1] = WORD_END;
_key_positions[2] = EOS;
_total_keysig_size = 2;
_delimiters = DEFAULT_DELIMITERS;
_jump = DEFAULT_JUMP_VALUE;
_option_word = DEFAULTCHARS | C;
_function_name = DEFAULT_NAME;
_key_name = DEFAULT_KEY;
_initializer_suffix = DEFAULT_INITIALIZER_SUFFIX;
_hash_name = DEFAULT_HASH_NAME;
_wordlist_name = DEFAULT_WORDLIST_NAME;
_class_name = DEFAULT_CLASS_NAME;
_size = 1;
_total_switches = 1;
_iterations = 0;
_initial_asso_value = 0;
}
/* Dumps option status when debug is set. */
@@ -390,8 +394,6 @@ Options::~Options ()
{
if (_option_word & DEBUG)
{
char *ptr;
fprintf (stderr, "\ndumping Options:"
"\nDEBUG is.......: %s"
"\nORDER is.......: %s"
@@ -421,7 +423,7 @@ Options::~Options ()
"\nkey name = %s"
"\ninitializer suffix = %s"
"\njump value = %d"
"\nmax associated value = %d"
"\nhash table size multiplier = %d"
"\ninitial associated value = %d"
"\ndelimiters = %s"
"\nnumber of switch statements = %d\n",
@@ -448,19 +450,22 @@ Options::~Options ()
_option_word & SEVENBIT ? "enabled" : "disabled",
_iterations,
_function_name, _hash_name, _wordlist_name, _key_name,
_initializer_suffix, _jump, _size - 1, _initial_asso_value,
_initializer_suffix, _jump, _size_multiple, _initial_asso_value,
_delimiters, _total_switches);
if (_option_word & ALLCHARS)
fprintf (stderr, "all characters are used in the hash function\n");
else
{
fprintf (stderr, "maximum keysig size = %d\nkey positions are: \n",
_key_positions.get_size());
fprintf (stderr, "maximum keysig size = %d\nkey positions are: \n",
_total_keysig_size);
for (ptr = _key_positions; *ptr != EOS; ptr++)
if (*ptr == WORD_END)
fprintf (stderr, "$\n");
else
fprintf (stderr, "%d\n", *ptr);
PositionIterator iter (_key_positions);
for (int pos; (pos = iter.next()) != PositionIterator::EOS; )
if (pos == Positions::LASTCHAR)
fprintf (stderr, "$\n");
else
fprintf (stderr, "%d\n", pos);
}
fprintf (stderr, "finished dumping Options\n");
}
@@ -505,7 +510,7 @@ static const struct option long_options[] =
};
void
Options::operator() (int argc, char *argv[])
Options::parse_options (int argc, char *argv[])
{
int option_char;
@@ -615,43 +620,52 @@ Options::operator() (int argc, char *argv[])
case 'k': /* Sets key positions used for hash function. */
{
const int BAD_VALUE = -1;
const int EOS = PositionIterator::EOS;
int value;
PositionStringParser sparser (/*getopt*/optarg, 1, MAX_KEY_POS - 1, WORD_END, BAD_VALUE, EOS);
PositionStringParser sparser (/*getopt*/optarg, 1, Positions::MAX_KEY_POS - 1, Positions::LASTCHAR, BAD_VALUE, EOS);
if (/*getopt*/optarg [0] == '*') /* Use all the characters for hashing!!!! */
_option_word = (_option_word & ~DEFAULTCHARS) | ALLCHARS;
else
{
char *key_pos;
unsigned char *key_positions = _key_positions.pointer();
unsigned char *key_pos;
for (key_pos = _key_positions; (value = sparser.nextPosition()) != EOS; key_pos++)
for (key_pos = key_positions; (value = sparser.nextPosition()) != EOS; key_pos++)
if (value == BAD_VALUE)
{
fprintf (stderr, "Illegal key value or range, use 1,2,3-%d,'$' or '*'.\n",
MAX_KEY_POS - 1);
Positions::MAX_KEY_POS - 1);
short_usage (stderr);
exit (1);
}
else
*key_pos = value;;
*key_pos = value;
*key_pos = EOS;
if (! (_total_keysig_size = (key_pos - _key_positions)))
unsigned int total_keysig_size = key_pos - key_positions;
if (total_keysig_size == 0)
{
fprintf (stderr, "No keys selected.\n");
short_usage (stderr);
exit (1);
}
else if (! key_sort (_key_positions, _total_keysig_size))
_key_positions.set_size (total_keysig_size);
/* Sorts the key positions *IN REVERSE ORDER!!*
This makes further routines more efficient. Especially
when generating code. */
if (! _key_positions.sort())
{
fprintf (stderr, "Duplicate keys selected\n");
short_usage (stderr);
exit (1);
}
if (_total_keysig_size != 2
|| (_key_positions[0] != 1 || _key_positions[1] != WORD_END))
if (!(_key_positions.get_size() == 2
&& _key_positions[0] == 1
&& _key_positions[1] == Positions::LASTCHAR))
_option_word &= ~DEFAULTCHARS;
}
break;
@@ -710,8 +724,8 @@ Options::operator() (int argc, char *argv[])
}
case 's': /* Range of associated values, determines size of final table. */
{
if (abs (_size = atoi (/*getopt*/optarg)) > 50)
fprintf (stderr, "%d is excessive, did you really mean this?! (try `%s --help' for help)\n", _size, program_name);
if (abs (_size_multiple = atoi (/*getopt*/optarg)) > 50)
fprintf (stderr, "%d is excessive, did you really mean this?! (try `%s --help' for help)\n", _size_multiple, program_name);
break;
}
case 'S': /* Generate switch statement output, rather than lookup table. */

View File

@@ -22,56 +22,139 @@ along with GNU GPERF; see the file COPYING. If not, write to the Free
Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
/* This module provides a uniform interface to the various options available
to a user of the gperf hash function generator. In addition to the
run-time options, found in the Option_Type below, there is also the
hash table Size and the Keys to be used in the hashing.
The overall design of this module was an experiment in using C++
classes as a mechanism to enhance centralization of option and
and error handling, which tend to get out of hand in a C program. */
to a user of the gperf hash function generator. */
#ifndef options_h
#define options_h 1
#include <stdio.h>
/* Enumerate the potential debugging Options. */
/* Enumeration of the possible boolean options. */
enum Option_Type
{
DEBUG = 01, /* Enable debugging (prints diagnostics to stderr). */
ORDER = 02, /* Apply ordering heuristic to speed-up search time. */
ALLCHARS = 04, /* Use all characters in hash function. */
TYPE = 010, /* Handle user-defined type structured keyword input. */
RANDOM = 020, /* Randomly initialize the associated values table. */
DEFAULTCHARS = 040, /* Make default char positions be 1,$ (end of keyword). */
SWITCH = 0100, /* Generate switch output to save space. */
NOLENGTH = 0200, /* Don't include keyword length in hash computations. */
LENTABLE = 0400, /* Generate a length table for string comparison. */
DUP = 01000, /* Handle duplicate hash values for keywords. */
FAST = 02000, /* Generate the hash function ``fast.'' */
NOTYPE = 04000, /* Don't include user-defined type definition in output -- it's already defined elsewhere. */
COMP = 010000, /* Generate strncmp rather than strcmp. */
GLOBAL = 020000, /* Make the keyword table a global variable. */
CONST = 040000, /* Make the generated tables readonly (const). */
KRC = 0100000, /* Generate K&R C code: no prototypes, no const. */
C = 0200000, /* Generate C code: no prototypes, but const (user can #define it away). */
ANSIC = 0400000, /* Generate ISO/ANSI C code: prototypes and const, but no class. */
CPLUSPLUS = 01000000, /* Generate C++ code: prototypes, const, class, inline, enum. */
ENUM = 02000000, /* Use enum for constants. */
INCLUDE = 04000000, /* Generate #include statements. */
SEVENBIT = 010000000 /* Assume 7-bit, not 8-bit, characters. */
/* Enable debugging (prints diagnostics to stderr). */
DEBUG = 1 << 0,
/* Apply ordering heuristic to speed-up search time. */
ORDER = 1 << 1,
/* Use all characters in hash function. */
ALLCHARS = 1 << 2,
/* Handle user-defined type structured keyword input. */
TYPE = 1 << 3,
/* Randomly initialize the associated values table. */
RANDOM = 1 << 4,
/* Make default char positions be 1,$ (end of keyword). */
DEFAULTCHARS = 1 << 5,
/* Generate switch output to save space. */
SWITCH = 1 << 6,
/* Don't include keyword length in hash computations. */
NOLENGTH = 1 << 7,
/* Generate a length table for string comparison. */
LENTABLE = 1 << 8,
/* Handle duplicate hash values for keywords. */
DUP = 1 << 9,
/* Generate the hash function "fast". */
FAST = 1 << 10,
/* Don't include user-defined type definition in output -- it's already
defined elsewhere. */
NOTYPE = 1 << 11,
/* Generate strncmp rather than strcmp. */
COMP = 1 << 12,
/* Make the keyword table a global variable. */
GLOBAL = 1 << 13,
/* Make the generated tables readonly (const). */
CONST = 1 << 14,
/* Generate K&R C code: no prototypes, no const. */
KRC = 1 << 15,
/* Generate C code: no prototypes, but const (user can #define it away). */
C = 1 << 16,
/* Generate ISO/ANSI C code: prototypes and const, but no class. */
ANSIC = 1 << 17,
/* Generate C++ code: prototypes, const, class, inline, enum. */
CPLUSPLUS = 1 << 18,
/* Use enum for constants. */
ENUM = 1 << 19,
/* Generate #include statements. */
INCLUDE = 1 << 20,
/* Assume 7-bit, not 8-bit, characters. */
SEVENBIT = 1 << 21
};
/* Define some useful constants (these don't really belong here, but I'm
not sure where else to put them!). These should be consts, but g++
doesn't seem to do the right thing with them at the moment... ;-( */
/* This class denotes a set of key positions. */
enum
class Positions
{
MAX_KEY_POS = 128 - 1, /* Max size of each word's key set. */
WORD_START = 1, /* Signals the start of a word. */
WORD_END = 0, /* Signals the end of a word. */
EOS = MAX_KEY_POS /* Signals end of the key list. */
friend class PositionIterator;
public:
/* Denotes the last char of a keyword, depending on the keyword's length. */
static const int LASTCHAR = 0;
/* Maximum size of the set. */
static const int MAX_KEY_POS = 127;
/* Constructors. */
Positions ();
Positions (int key1);
Positions (int key1, int key2);
/* Accessors. */
int operator[] (unsigned int index) const;
unsigned int get_size () const;
/* Write access. */
unsigned char * pointer ();
void set_size (unsigned int size);
/* Sorts the array in reverse order.
Returns 1 if there are no duplicates, 0 otherwise. */
int sort ();
private:
/* Number of positions, excluding the terminating PositionIterator::EOS. */
unsigned int _size;
/* Array of positions. 1 for the first char, 2 for the second char etc.,
LASTCHAR for the last char. PositionIterator::EOS past the end. */
unsigned char _positions[MAX_KEY_POS];
};
/* This class denotes an iterator through a set of key positions. */
class PositionIterator
{
public:
/* Initializes an iterator through POSITIONS. */
PositionIterator (Positions const& positions);
/* End of iteration marker. */
static const int EOS = Positions::MAX_KEY_POS;
/* Retrieves the next position, or EOS past the end. */
int next ();
private:
const Positions& _set;
int _index;
};
/* Class manager for gperf program Options. */
@@ -79,54 +162,122 @@ enum
class Options
{
public:
Options ();
~Options ();
int operator[] (Option_Type option);
void operator() (int argc, char *argv[]);
static void print_options ();
static void set_asso_max (int r);
static int get_asso_max ();
static void reset ();
static int get ();
static int get_iterations ();
static int get_max_keysig_size ();
static void set_keysig_size (int);
static int get_jump ();
static int initial_value ();
static int get_total_switches ();
static const char *get_function_name ();
static const char *get_key_name ();
static const char *get_initializer_suffix ();
static const char *get_class_name ();
static const char *get_hash_name ();
static const char *get_wordlist_name ();
static const char *get_delimiter ();
/* Constructor. */
Options ();
/* Destructor. */
~Options ();
/* Parses the options given in the command-line arguments. */
void parse_options (int argc, char *argv[]);
/* Prints the given options. */
void print_options () const;
/* Accessors. */
/* Tests a given boolean option. Returns 1 if set, 0 otherwise. */
int operator[] (Option_Type option) const;
/* Returns the iterations value. */
int get_iterations () const;
/* Returns the jump value. */
int get_jump () const;
/* Returns the initial associated character value. */
int get_initial_asso_value () const;
/* Returns the total number of switch statements to generate. */
int get_total_switches () const;
/* Returns the factor by which to multiply the generated table's size. */
int get_size_multiple () const;
/* Returns the generated function name. */
const char * get_function_name () const;
/* Returns the keyword key name. */
const char * get_key_name () const;
/* Returns the struct initializer suffix. */
const char * get_initializer_suffix () const;
/* Returns the generated class name. */
const char * get_class_name () const;
/* Returns the hash function name. */
const char * get_hash_name () const;
/* Returns the hash table array name. */
const char * get_wordlist_name () const;
/* Returns the string used to delimit keywords from other attributes. */
const char * get_delimiter () const;
/* Returns key positions. */
const Positions& get_key_positions () const;
/* Returns total distinct key positions. */
int get_max_keysig_size () const;
private:
static int _option_word; /* Holds the user-specified Options. */
static int _total_switches; /* Number of switch statements to generate. */
static int _total_keysig_size; /* Total number of distinct key_positions. */
static int _size; /* Range of the hash table. */
static int _key_pos; /* Tracks current key position for Iterator. */
static int _jump; /* Jump length when trying alternative values. */
static int _initial_asso_value; /* Initial value for asso_values table. */
static int _argument_count; /* Records count of command-line arguments. */
static int _iterations; /* Amount to iterate when a collision occurs. */
static char **_argument_vector; /* Stores a pointer to command-line vector. */
static const char *_function_name; /* Names used for generated lookup function. */
static const char *_key_name; /* Name used for keyword key. */
static const char *_initializer_suffix; /* Suffix for empty struct initializers. */
static const char *_class_name; /* Name used for generated C++ class. */
static const char *_hash_name; /* Name used for generated hash function. */
static const char *_wordlist_name; /* Name used for hash table array. */
static const char *_delimiters; /* Separates keywords from other attributes. */
static char _key_positions[MAX_KEY_POS]; /* Contains user-specified key choices. */
static int key_sort (char *base, int len); /* Sorts key positions in REVERSE order. */
static void short_usage (FILE * strm); /* Prints proper program usage. */
static void long_usage (FILE * strm); /* Prints proper program usage. */
/* Prints program usage to given stream. */
void short_usage (FILE * stream) const;
/* Prints program usage to given stream. */
void long_usage (FILE * stream) const;
/* Records count of command-line arguments. */
int _argument_count;
/* Stores a pointer to command-line argument vector. */
char **_argument_vector;
/* Holds the boolean options. */
int _option_word;
/* Amount to iterate when a collision occurs. */
int _iterations;
/* Jump length when trying alternative values. */
int _jump;
/* Initial value for asso_values table. */
int _initial_asso_value;
/* Number of switch statements to generate. */
int _total_switches;
/* Factor by which to multiply the generated table's size. */
int _size_multiple;
/* Names used for generated lookup function. */
const char *_function_name;
/* Name used for keyword key. */
const char *_key_name;
/* Suffix for empty struct initializers. */
const char *_initializer_suffix;
/* Name used for generated C++ class. */
const char *_class_name;
/* Name used for generated hash function. */
const char *_hash_name;
/* Name used for hash table array. */
const char *_wordlist_name;
/* Separates keywords from other attributes. */
const char *_delimiters;
/* Contains user-specified key choices. */
Positions _key_positions;
};
/* Global option coordinator for the entire program. */
/* Global option coordinator for the entire program. */
extern Options option;
#ifdef __OPTIMIZE__

View File

@@ -19,128 +19,193 @@ You should have received a copy of the GNU General Public License
along with GNU GPERF; see the file COPYING. If not, write to the Free
Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
/* TRUE if option enable, else FALSE. */
INLINE
Positions::Positions ()
: _size (0)
{
_positions[0] = PositionIterator::EOS;
}
INLINE
Positions::Positions (int key1)
: _size (1)
{
_positions[0] = key1;
_positions[1] = PositionIterator::EOS;
}
INLINE
Positions::Positions (int key1, int key2)
: _size (2)
{
_positions[0] = key1;
_positions[1] = key2;
_positions[2] = PositionIterator::EOS;
}
INLINE int
Options::operator[] (Option_Type option)
Positions::operator[] (unsigned int index) const
{
return _option_word & option;
return _positions[index];
}
/* Initializes the key Iterator. */
INLINE void
Options::reset ()
{
_key_pos = 0;
}
/* Returns current key_position and advance index. */
INLINE int
Options::get ()
{
return _key_positions[_key_pos++];
}
/* Sets the size of the table size. */
INLINE void
Options::set_asso_max (int r)
{
_size = r;
}
/* Returns the size of the table size. */
INLINE int
Options::get_asso_max ()
INLINE unsigned int
Positions::get_size () const
{
return _size;
}
/* Returns total distinct key positions. */
INLINE int
Options::get_max_keysig_size ()
INLINE unsigned char *
Positions::pointer ()
{
return _total_keysig_size;
return _positions;
}
/* Sets total distinct key positions. */
INLINE void
Options::set_keysig_size (int size)
Positions::set_size (unsigned int size)
{
_total_keysig_size = size;
_size = size;
}
/* Returns the jump value. */
/* Sorts the array in reverse order.
Returns 1 if there are no duplicates, 0 otherwise. */
INLINE int
Options::get_jump ()
Positions::sort ()
{
return _jump;
/* Bubble sort. */
int duplicate_free = 1;
unsigned char *base = _positions;
unsigned int len = _size;
for (unsigned int i = 1; i < len; i++)
{
unsigned int j;
int tmp;
for (j = i, tmp = base[j]; j > 0 && tmp >= base[j - 1]; j--)
if ((base[j] = base[j - 1]) == tmp) /* oh no, a duplicate!!! */
duplicate_free = 0;
base[j] = tmp;
}
return duplicate_free;
}
/* Returns the generated function name. */
INLINE const char *
Options::get_function_name ()
INLINE
PositionIterator::PositionIterator (Positions const& positions)
: _set (positions),
_index (0)
{
return _function_name;
}
/* Returns the keyword key name. */
INLINE const char *
Options::get_key_name ()
{
return _key_name;
}
/* Returns the struct initializer suffix. */
INLINE const char *
Options::get_initializer_suffix ()
{
return _initializer_suffix;
}
/* Returns the hash function name. */
INLINE const char *
Options::get_hash_name ()
{
return _hash_name;
}
/* Returns the hash table array name. */
INLINE const char *
Options::get_wordlist_name ()
{
return _wordlist_name;
}
/* Returns the generated class name. */
INLINE const char *
Options::get_class_name ()
{
return _class_name;
}
/* Returns the initial associated character value. */
INLINE int
Options::initial_value ()
PositionIterator::next ()
{
return _initial_asso_value;
return _set._positions[_index++];
}
/* Returns the iterations value. */
/* Tests a given boolean option. Returns 1 if set, 0 otherwise. */
INLINE int
Options::get_iterations ()
Options::operator[] (Option_Type option) const
{
return _option_word & option;
}
/* Returns the iterations value. */
INLINE int
Options::get_iterations () const
{
return _iterations;
}
/* Returns the string used to delimit keywords from other attributes. */
/* Returns the jump value. */
INLINE int
Options::get_jump () const
{
return _jump;
}
/* Returns the initial associated character value. */
INLINE int
Options::get_initial_asso_value () const
{
return _initial_asso_value;
}
/* Returns the total number of switch statements to generate. */
INLINE int
Options::get_total_switches () const
{
return _total_switches;
}
/* Returns the factor by which to multiply the generated table's size. */
INLINE int
Options::get_size_multiple () const
{
return _size_multiple;
}
/* Returns the generated function name. */
INLINE const char *
Options::get_delimiter ()
Options::get_function_name () const
{
return _function_name;
}
/* Returns the keyword key name. */
INLINE const char *
Options::get_key_name () const
{
return _key_name;
}
/* Returns the struct initializer suffix. */
INLINE const char *
Options::get_initializer_suffix () const
{
return _initializer_suffix;
}
/* Returns the generated class name. */
INLINE const char *
Options::get_class_name () const
{
return _class_name;
}
/* Returns the hash function name. */
INLINE const char *
Options::get_hash_name () const
{
return _hash_name;
}
/* Returns the hash table array name. */
INLINE const char *
Options::get_wordlist_name () const
{
return _wordlist_name;
}
/* Returns the string used to delimit keywords from other attributes. */
INLINE const char *
Options::get_delimiter () const
{
return _delimiters;
}
/* Gets the total number of switch statements to generate. */
INLINE int
Options::get_total_switches ()
/* Returns key positions. */
INLINE const Positions&
Options::get_key_positions () const
{
return _total_switches;
return _key_positions;
}
/* Returns total distinct key positions. */
INLINE int
Options::get_max_keysig_size () const
{
return _key_positions.get_size();
}

View File

@@ -470,16 +470,34 @@ Output::output_hash_function ()
printf (" return %sasso_values[%sstr[len - 1]] + asso_values[%sstr[0]];\n",
option[NOLENGTH] ? "" : "len + ",
char_to_index, char_to_index);
else if (option[ALLCHARS])
{
/* User wants *all* characters considered in hash. */
printf (" register int hval = %s;\n\n"
" switch (%s)\n"
" {\n"
" default:\n",
option[NOLENGTH] ? "0" : "len",
option[NOLENGTH] ? "len" : "hval");
for (int i = _max_key_len; i > 0; i--)
printf (" case %d:\n"
" hval += asso_values[%sstr[%d]];\n",
i, char_to_index, i - 1);
printf (" break;\n"
" }\n"
" return hval;\n");
}
else
{
PositionIterator iter (option.get_key_positions());
int key_pos;
option.reset ();
/* Get first (also highest) key position. */
key_pos = option.get ();
key_pos = iter.next ();
if (!option[ALLCHARS] && (key_pos == WORD_END || key_pos <= _min_key_len))
if (key_pos == Positions::LASTCHAR || key_pos <= _min_key_len)
{
/* We can perform additional optimizations here:
Write it out as a single expression. Note that the values
@@ -489,16 +507,16 @@ Output::output_hash_function ()
printf (" return %s",
option[NOLENGTH] ? "" : "len + ");
for (; key_pos != WORD_END; )
for (; key_pos != Positions::LASTCHAR; )
{
printf ("asso_values[%sstr[%d]]", char_to_index, key_pos - 1);
if ((key_pos = option.get ()) != EOS)
if ((key_pos = iter.next ()) != PositionIterator::EOS)
printf (" + ");
else
break;
}
if (key_pos == WORD_END)
if (key_pos == Positions::LASTCHAR)
printf ("asso_values[%sstr[len - 1]]", char_to_index);
printf (";\n");
@@ -513,50 +531,35 @@ Output::output_hash_function ()
option[NOLENGTH] ? "0" : "len",
option[NOLENGTH] ? "len" : "hval");
/* User wants *all* characters considered in hash. */
if (option[ALLCHARS])
{
for (int i = _max_key_len; i > 0; i--)
printf (" case %d:\n"
" hval += asso_values[%sstr[%d]];\n",
i, char_to_index, i - 1);
while (key_pos != Positions::LASTCHAR && key_pos > _max_key_len)
if ((key_pos = iter.next ()) == PositionIterator::EOS)
break;
printf (" break;\n"
" }\n"
" return hval;\n");
}
else /* do the hard part... */
if (key_pos != PositionIterator::EOS && key_pos != Positions::LASTCHAR)
{
while (key_pos != WORD_END && key_pos > _max_key_len)
if ((key_pos = option.get ()) == EOS)
break;
if (key_pos != EOS && key_pos != WORD_END)
int i = key_pos;
do
{
int i = key_pos;
do
{
for ( ; i >= key_pos; i--)
printf (" case %d:\n", i);
printf (" hval += asso_values[%sstr[%d]];\n",
char_to_index, key_pos - 1);
key_pos = option.get ();
}
while (key_pos != EOS && key_pos != WORD_END);
for ( ; i >= _min_key_len; i--)
for ( ; i >= key_pos; i--)
printf (" case %d:\n", i);
}
printf (" break;\n"
" }\n"
" return hval");
if (key_pos == WORD_END)
printf (" + asso_values[%sstr[len - 1]]", char_to_index);
printf (";\n");
printf (" hval += asso_values[%sstr[%d]];\n",
char_to_index, key_pos - 1);
key_pos = iter.next ();
}
while (key_pos != PositionIterator::EOS && key_pos != Positions::LASTCHAR);
for ( ; i >= _min_key_len; i--)
printf (" case %d:\n", i);
}
printf (" break;\n"
" }\n"
" return hval");
if (key_pos == Positions::LASTCHAR)
printf (" + asso_values[%sstr[len - 1]]", char_to_index);
printf (";\n");
}
}
printf ("}\n\n");
@@ -1432,7 +1435,7 @@ Output::output ()
else if (option[CPLUSPLUS])
printf ("C++");
printf (" code produced by gperf version %s */\n", version_string);
Options::print_options ();
option.print_options ();
printf ("%s\n", _include_src);