1
0
mirror of https://git.savannah.gnu.org/git/gperf.git synced 2025-12-02 21:19:24 +00:00

When the option -k is not given, the default key positions are now computed

depending on the set of keywords.
This commit is contained in:
Bruno Haible
2003-02-11 11:09:27 +00:00
parent 6202aaadb1
commit 810fef43ae
18 changed files with 583 additions and 123 deletions

View File

@@ -1,3 +1,55 @@
2002-11-17 Bruno Haible <bruno@clisp.org>
* src/options.h (POSITIONS): New enum value.
(Positions::Positions): New copy constructor.
(Positions::operator=, Positions::contains, Position::add,
Positions::remove, Positions::print): New method declaration.
(Options::get_max_keysig_size): Remove method.
* src/options.icc (Positions::Positions): New copy constructor.
(Positions::operator=): New method.
(Options::get_max_keysig_size): Remove method.
* src/options.cc (Options::Options): Initialize _key_positions
trivially.
(Options::parse_options): Option -k sets POSITIONS.
(Positions::contains, Positions::add, Positions::remove,
Positions::print): New methods.
* src/hash-table.cc (Hash_Table::~Hash_Table): Compute the field
width explicitly, instead of using Options::get_max_keysig_size.
* src/keyword.h (KeywordExt::init_selchars): Add arguments
use_all_chars, positions.
(KeywordExt::delete_selchars): New declaration.
* src/keyword.cc (KeywordExt::init_selchars): Add arguments
use_all_chars, positions. Remove error message if there are no key
positions.
(KeywordExt::delete_selchars): New method.
* src/search.h: Include options.h.
(Search::preprepare, Search::init_selchars, Search::delete_selchars,
Search::count_duplicates, Search::find_positions): New declarations.
(Search::_key_positions): New field.
* src/search.cc (Search::Search): Initialize _key_positions.
(Search::preprepare, Search::init_selchars, Search::delete_selchars,
Search::count_duplicates, Search::find_positions): New functions.
(Search::prepare): Call preprepare and find_positions. Tweak error
message.
(Search::get_max_keysig_size): Use _key_positions instead of
option.get_key_positions().
(Search::optimize): Tweak error message.
* src/output.h: Include options.h.
(Output::Output): Add Positions argument.
(Output::_key_positions): New field.
* src/output.cc (Output::Output): Add Positions argument.
(Output::output_hash_function): Omit the table if there are no
positions at all. Use _key_positions instead of
option.get_key_positions().
(Output::output): Output the computed positions as a comment.
* src/main.cc (main): Pass the Positions from Searcher to Output.
* src/Makefile.in (SEARCH_H, OUTPUT_H): Include OPTIONS_H.
* tests/Makefile.in (check-test): Pass key positions explicitly.
* tests/gpc.exp: Update.
* tests/test-4.exp: Update.
* doc/gperf.texi (Algorithmic Details): Mention that -k is not needed
usually.
2002-11-16 Bruno Haible <bruno@clisp.org>
* src/options.h (Options::get_slot_name): Renamed from

2
NEWS
View File

@@ -27,6 +27,8 @@ New in 2.8:
%define word-array-name NAME
%switch=COUNT
%omit-struct-type
* When the option -k is not given, the default key positions are now
computed depending on the set of keywords.
* If the input file is given by name, the output file will now contain
#line directives referring to the input file.
* Bug fixes.

View File

@@ -985,6 +985,10 @@ with length less than the indicated byte positions work properly, since
selected byte positions exceeding the keyword length are simply not
referenced in the hash function.
This option is not normally needed since version 2.8 of @code{gperf};
the default byte positions are computed depending on the keyword set,
through a search that minimizes the number of byte positions.
@item -D
@itemx --duplicates
@cindex Duplicates

View File

@@ -91,8 +91,8 @@ KEYWORD_LIST_H = keyword-list.h keyword-list.icc $(KEYWORD_H)
INPUT_H = input.h $(KEYWORD_LIST_H)
BOOL_ARRAY_H = bool-array.h bool-array.icc $(OPTIONS_H)
HASH_TABLE_H = hash-table.h $(KEYWORD_H)
SEARCH_H = search.h $(KEYWORD_LIST_H) $(BOOL_ARRAY_H)
OUTPUT_H = output.h $(KEYWORD_LIST_H)
SEARCH_H = search.h $(KEYWORD_LIST_H) $(OPTIONS_H) $(BOOL_ARRAY_H)
OUTPUT_H = output.h $(KEYWORD_LIST_H) $(OPTIONS_H)
version.o : version.cc $(VERSION_H)
$(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $(srcdir)/version.cc

View File

@@ -89,16 +89,11 @@ Hash_Table::~Hash_Table ()
{
int field_width;
if (option[ALLCHARS])
{
field_width = 0;
for (int i = _size - 1; i >= 0; i--)
if (_table[i])
if (field_width < _table[i]->_selchars_length)
field_width = _table[i]->_selchars_length;
}
else
field_width = option.get_max_keysig_size ();
field_width = 0;
for (int i = _size - 1; i >= 0; i--)
if (_table[i])
if (field_width < _table[i]->_selchars_length)
field_width = _table[i]->_selchars_length;
fprintf (stderr,
"\ndumping the hash table\n"

View File

@@ -47,7 +47,7 @@ static inline void sort_char_set (unsigned char *base, int len)
}
}
/* Initialize selchars and selchars_length.
/* Initializes selchars and selchars_length.
The hash function will be computed as
asso_values[allchars[key_pos[0]]] + asso_values[allchars[key_pos[1]]] + ...
We compute selchars as the multiset
@@ -57,14 +57,15 @@ static inline void sort_char_set (unsigned char *base, int len)
Furthermore we sort the selchars array, to ease detection of duplicates
later.
*/
void KeywordExt::init_selchars ()
void
KeywordExt::init_selchars (bool use_all_chars, const Positions& positions)
{
const char *k = _allchars;
unsigned char *key_set =
new unsigned char[(option[ALLCHARS] ? _allchars_length : option.get_max_keysig_size ())];
new unsigned char[(use_all_chars ? _allchars_length : positions.get_size ())];
unsigned char *ptr = key_set;
if (option[ALLCHARS])
if (use_all_chars)
/* Use all the character positions in the KEY. */
for (int i = _allchars_length; i > 0; k++, i--)
{
@@ -76,7 +77,7 @@ void KeywordExt::init_selchars ()
{
/* Iterate through the list of key_positions, initializing selchars
(via ptr). */
PositionIterator iter (option.get_key_positions ());
PositionIterator iter (positions);
for (int i; (i = iter.next ()) != PositionIterator::EOS; )
{
@@ -91,15 +92,6 @@ void KeywordExt::init_selchars ()
continue;
ptr++;
}
/* Didn't get any hits and user doesn't want to consider the
keylength, so there are essentially no usable hash positions! */
if (ptr == key_set && option[NOLENGTH])
{
fprintf (stderr, "Can't hash keyword %.*s with chosen key positions.\n",
_allchars_length, _allchars);
exit (1);
}
}
/* Sort the KEY_SET items alphabetically. */
@@ -109,6 +101,13 @@ void KeywordExt::init_selchars ()
_selchars_length = ptr - key_set;
}
/* Deletes selchars. */
void
KeywordExt::delete_selchars ()
{
delete[] _selchars;
}
/* ------------------------- Keyword_Factory class ------------------------- */

View File

@@ -26,6 +26,9 @@
#ifndef keyword_h
#define keyword_h 1
/* Class defined in "options.h". */
class Positions;
/* An instance of this class is a keyword, as specified in the input file. */
struct Keyword
@@ -64,8 +67,10 @@ struct KeywordExt : public Keyword
KeywordExt * _duplicate_link;
/* Methods depending on the keyposition list. */
/* Initialize selchars and selchars_length. */
void init_selchars ();
/* Initializes selchars and selchars_length. */
void init_selchars (bool use_all_chars, const Positions& positions);
/* Deletes selchars. */
void delete_selchars ();
/* Data members used by the algorithm. */
int _occurrence; /* Frequency of key set occurrences. */

View File

@@ -103,9 +103,10 @@ main (int argc, char *argv[])
inputter._verbatim_code_end,
inputter._verbatim_code_lineno,
searcher._total_keys,
searcher._total_duplicates,
searcher._max_key_len,
searcher._min_key_len,
searcher._key_positions,
searcher._total_duplicates,
searcher._alpha_size,
searcher._occurrences,
searcher._asso_values);

View File

@@ -442,7 +442,7 @@ Options::Options ()
_hash_name (DEFAULT_HASH_NAME),
_wordlist_name (DEFAULT_WORDLIST_NAME),
_delimiters (DEFAULT_DELIMITERS),
_key_positions (1, Positions::LASTCHAR)
_key_positions ()
{
}
@@ -766,6 +766,7 @@ Options::parse_options (int argc, char *argv[])
}
case 'k': /* Sets key positions used for hash function. */
{
_option_word |= POSITIONS;
const int BAD_VALUE = -2;
const int EOS = PositionIterator::EOS;
int value;
@@ -782,7 +783,7 @@ Options::parse_options (int argc, char *argv[])
{
if (value == BAD_VALUE)
{
fprintf (stderr, "Invalid key value or range, use 1,2,3-%d,'$' or '*'.\n",
fprintf (stderr, "Invalid position value or range, use 1,2,3-%d,'$' or '*'.\n",
Positions::MAX_KEY_POS);
short_usage (stderr);
exit (1);
@@ -793,7 +794,7 @@ Options::parse_options (int argc, char *argv[])
Since all key positions are in the range
1..Positions::MAX_KEY_POS or == Positions::LASTCHAR,
there must be duplicates. */
fprintf (stderr, "Duplicate keys selected\n");
fprintf (stderr, "Duplicate key positions selected\n");
short_usage (stderr);
exit (1);
}
@@ -803,7 +804,7 @@ Options::parse_options (int argc, char *argv[])
unsigned int total_keysig_size = key_pos - key_positions;
if (total_keysig_size == 0)
{
fprintf (stderr, "No keys selected.\n");
fprintf (stderr, "No key positions selected.\n");
short_usage (stderr);
exit (1);
}
@@ -814,7 +815,7 @@ Options::parse_options (int argc, char *argv[])
when generating code. */
if (! _key_positions.sort())
{
fprintf (stderr, "Duplicate keys selected\n");
fprintf (stderr, "Duplicate key positions selected\n");
short_usage (stderr);
exit (1);
}
@@ -939,6 +940,137 @@ Options::parse_options (int argc, char *argv[])
}
}
/* ---------------------------- Class Positions ---------------------------- */
/* Set operations. Assumes the array is in reverse order. */
bool
Positions::contains (int pos) const
{
unsigned int count = _size;
const unsigned char *p = _positions + _size - 1;
for (; count > 0; p--, count--)
{
if (*p == pos)
return true;
if (*p > pos)
break;
}
return false;
}
void
Positions::add (int pos)
{
unsigned int count = _size;
if (count == MAX_KEY_POS + 1)
{
fprintf (stderr, "Positions::add internal error: overflow\n");
exit (1);
}
unsigned char *p = _positions + _size - 1;
for (; count > 0; p--, count--)
{
if (*p == pos)
{
fprintf (stderr, "Positions::add internal error: duplicate\n");
exit (1);
}
if (*p > pos)
break;
p[1] = p[0];
}
p[1] = pos;
_size++;
}
void
Positions::remove (int pos)
{
unsigned int count = _size;
if (count > 0)
{
unsigned char *p = _positions + _size - 1;
if (*p == pos)
{
_size--;
return;
}
if (*p < pos)
{
unsigned char prev = *p;
for (;;)
{
p--;
count--;
if (count == 0)
break;
if (*p == pos)
{
*p = prev;
_size--;
return;
}
if (*p > pos)
break;
unsigned char curr = *p;
*p = prev;
prev = curr;
}
}
}
fprintf (stderr, "Positions::remove internal error: not found\n");
exit (1);
}
/* Output in external syntax. */
void
Positions::print () const
{
bool first = true;
bool seen_LASTCHAR = false;
unsigned int count = _size;
const unsigned char *p = _positions + _size - 1;
for (; count > 0; p--, count--)
{
if (*p == LASTCHAR)
seen_LASTCHAR = true;
else
{
if (!first)
printf (",");
printf ("%d", *p);
if (count > 0 && p[-1] == *p + 1)
{
printf ("-");
do
{
p--;
count--;
}
while (count > 0 && p[-1] == *p + 1);
printf ("%d", *p);
}
first = false;
}
}
if (seen_LASTCHAR)
{
if (!first)
printf (",");
printf ("$");
}
}
/* ------------------------------------------------------------------------- */
#ifndef __OPTIMIZE__
#define INLINE /* not inline */

View File

@@ -41,63 +41,66 @@ enum Option_Type
/* Apply ordering heuristic to speed-up search time. */
ORDER = 1 << 1,
/* Use the given key positions. */
POSITIONS = 1 << 2,
/* Use all characters in hash function. */
ALLCHARS = 1 << 2,
ALLCHARS = 1 << 3,
/* Handle user-defined type structured keyword input. */
TYPE = 1 << 3,
TYPE = 1 << 4,
/* Randomly initialize the associated values table. */
RANDOM = 1 << 4,
RANDOM = 1 << 5,
/* Generate switch output to save space. */
SWITCH = 1 << 5,
SWITCH = 1 << 6,
/* Don't include keyword length in hash computations. */
NOLENGTH = 1 << 6,
NOLENGTH = 1 << 7,
/* Generate a length table for string comparison. */
LENTABLE = 1 << 7,
LENTABLE = 1 << 8,
/* Handle duplicate hash values for keywords. */
DUP = 1 << 8,
DUP = 1 << 9,
/* Generate the hash function "fast". */
FAST = 1 << 9,
FAST = 1 << 10,
/* Don't include user-defined type definition in output -- it's already
defined elsewhere. */
NOTYPE = 1 << 10,
NOTYPE = 1 << 11,
/* Generate strncmp rather than strcmp. */
COMP = 1 << 11,
COMP = 1 << 12,
/* Make the keyword table a global variable. */
GLOBAL = 1 << 12,
GLOBAL = 1 << 13,
/* Make the generated tables readonly (const). */
CONST = 1 << 13,
CONST = 1 << 14,
/* Generate K&R C code: no prototypes, no const. */
KRC = 1 << 14,
KRC = 1 << 15,
/* Generate C code: no prototypes, but const (user can #define it away). */
C = 1 << 15,
C = 1 << 16,
/* Generate ISO/ANSI C code: prototypes and const, but no class. */
ANSIC = 1 << 16,
ANSIC = 1 << 17,
/* Generate C++ code: prototypes, const, class, inline, enum. */
CPLUSPLUS = 1 << 17,
CPLUSPLUS = 1 << 18,
/* Use enum for constants. */
ENUM = 1 << 18,
ENUM = 1 << 19,
/* Generate #include statements. */
INCLUDE = 1 << 21,
INCLUDE = 1 << 20,
/* Assume 7-bit, not 8-bit, characters. */
SEVENBIT = 1 << 22
SEVENBIT = 1 << 21
};
/* This class denotes a set of key positions. */
@@ -115,8 +118,14 @@ public:
/* Constructors. */
Positions ();
Positions (int key1);
Positions (int key1, int key2);
Positions (int pos1);
Positions (int pos1, int pos2);
/* Copy constructor. */
Positions (const Positions& src);
/* Assignment operator. */
Positions& operator= (const Positions& src);
/* Accessors. */
int operator[] (unsigned int index) const;
@@ -130,6 +139,14 @@ public:
Returns true if there are no duplicates, false otherwise. */
bool sort ();
/* Set operations. Assumes the array is in reverse order. */
bool contains (int pos) const;
void add (int pos);
void remove (int pos);
/* Output in external syntax. */
void print () const;
private:
/* Number of positions. */
unsigned int _size;
@@ -246,13 +263,9 @@ public:
void set_delimiters (const char *delimiters);
/* Returns key positions.
Only to be called if !options[ALLCHARS]. */
Only to be used if !options[ALLCHARS]. */
const Positions& get_key_positions () const;
/* Returns total distinct key positions.
Only to be called if !options[ALLCHARS]. */
int get_max_keysig_size () const;
private:
/* Prints program usage to given stream. */
void short_usage (FILE * stream) const;

View File

@@ -32,18 +32,37 @@ Positions::Positions ()
}
INLINE
Positions::Positions (int key1)
Positions::Positions (int pos1)
: _size (1)
{
_positions[0] = key1;
_positions[0] = pos1;
}
INLINE
Positions::Positions (int key1, int key2)
Positions::Positions (int pos1, int pos2)
: _size (2)
{
_positions[0] = key1;
_positions[1] = key2;
_positions[0] = pos1;
_positions[1] = pos2;
}
/* Copy constructor. */
INLINE
Positions::Positions (const Positions& src)
: _size (src._size)
{
memcpy (_positions, src._positions, _size * sizeof (_positions[0]));
}
/* Assignment operator. */
INLINE Positions&
Positions::operator= (const Positions& src)
{
_size = src._size;
memcpy (_positions, src._positions, _size * sizeof (_positions[0]));
return *this;
}
/* Accessors. */
@@ -238,17 +257,9 @@ Options::get_delimiters () const
}
/* Returns key positions.
Only to be called if !options[ALLCHARS]. */
Only to be used if !options[ALLCHARS]. */
INLINE const Positions&
Options::get_key_positions () const
{
return _key_positions;
}
/* Returns total distinct key positions.
Only to be called if !options[ALLCHARS]. */
INLINE int
Options::get_max_keysig_size () const
{
return _key_positions.get_size();
}

View File

@@ -87,8 +87,9 @@ Output::Output (KeywordExt_List *head, const char *struct_decl,
unsigned int verbatim_declarations_lineno,
const char *verbatim_code, const char *verbatim_code_end,
unsigned int verbatim_code_lineno,
int total_keys, int total_duplicates, int max_key_len,
int min_key_len, int alpha_size, const int *occurrences,
int total_keys, int max_key_len, int min_key_len,
const Positions& positions, int total_duplicates,
int alpha_size, const int *occurrences,
const int *asso_values)
: _head (head), _struct_decl (struct_decl),
_struct_decl_lineno (struct_decl_lineno), _return_type (return_type),
@@ -99,9 +100,10 @@ Output::Output (KeywordExt_List *head, const char *struct_decl,
_verbatim_code (verbatim_code),
_verbatim_code_end (verbatim_code_end),
_verbatim_code_lineno (verbatim_code_lineno),
_total_keys (total_keys), _total_duplicates (total_duplicates),
_total_keys (total_keys),
_max_key_len (max_key_len), _min_key_len (min_key_len),
_alpha_size (alpha_size),
_key_positions (positions),
_total_duplicates (total_duplicates), _alpha_size (alpha_size),
_occurrences (occurrences), _asso_values (asso_values)
{
}
@@ -480,32 +482,33 @@ Output::output_hash_function () const
printf ("{\n");
/* First the asso_values array. */
{
printf (" static %s%s asso_values[] =\n"
" {",
const_readonly_array,
smallest_integral_type (_max_hash_value + 1));
if (option[ALLCHARS] || _key_positions.get_size() > 0)
{
printf (" static %s%s asso_values[] =\n"
" {",
const_readonly_array,
smallest_integral_type (_max_hash_value + 1));
const int columns = 10;
const int columns = 10;
/* Calculate maximum number of digits required for MAX_HASH_VALUE. */
int field_width = 2;
for (int trunc = _max_hash_value; (trunc /= 10) > 0;)
field_width++;
/* Calculate maximum number of digits required for MAX_HASH_VALUE. */
int field_width = 2;
for (int trunc = _max_hash_value; (trunc /= 10) > 0;)
field_width++;
for (int count = 0; count < _alpha_size; count++)
{
if (count > 0)
printf (",");
if ((count % columns) == 0)
printf ("\n ");
printf ("%*d", field_width,
_occurrences[count] ? _asso_values[count] : _max_hash_value + 1);
}
for (int count = 0; count < _alpha_size; count++)
{
if (count > 0)
printf (",");
if ((count % columns) == 0)
printf ("\n ");
printf ("%*d", field_width,
_occurrences[count] ? _asso_values[count] : _max_hash_value + 1);
}
printf ("\n"
" };\n");
}
printf ("\n"
" };\n");
}
if (option[ALLCHARS])
{
@@ -526,12 +529,18 @@ Output::output_hash_function () const
" }\n"
" return hval;\n");
}
else if (_key_positions.get_size() == 0)
{
/* Trivial case: No key positions at all. */
printf (" return %s;\n",
option[NOLENGTH] ? "0" : "len");
}
else
{
/* Iterate through the key positions. Remember that Positions::sort()
has sorted them in decreasing order, with Positions::LASTCHAR coming
last. */
PositionIterator iter (option.get_key_positions());
PositionIterator iter (_key_positions);
int key_pos;
/* Get the highest key position. */
@@ -547,9 +556,9 @@ Output::output_hash_function () const
printf (" return %s",
option[NOLENGTH] ? "" : "len + ");
if (option.get_key_positions().get_size() == 2
&& option.get_key_positions()[0] == 1
&& option.get_key_positions()[1] == Positions::LASTCHAR)
if (_key_positions.get_size() == 2
&& _key_positions[0] == 1
&& _key_positions[1] == Positions::LASTCHAR)
/* Optimize special case of "-k 1,$". */
printf ("asso_values[%sstr[len - 1]] + asso_values[%sstr[0]]",
char_to_index, char_to_index);
@@ -1492,6 +1501,12 @@ Output::output ()
printf (" code produced by gperf version %s */\n", version_string);
option.print_options ();
printf ("\n");
if (!option[POSITIONS])
{
printf ("/* Computed positions: -k'");
_key_positions.print();
printf ("' */\n");
}
if (_verbatim_declarations < _verbatim_declarations_end)
{

View File

@@ -27,6 +27,7 @@
#define output_h 1
#include "keyword-list.h"
#include "options.h"
/* OSF/1 cxx needs these forward declarations. */
struct Output_Constants;
@@ -48,8 +49,9 @@ public:
const char *verbatim_code_end,
unsigned int verbatim_code_lineno,
int total_keys,
int total_duplicates,
int max_key_len, int min_key_len,
const Positions& positions,
int total_duplicates,
int alpha_size,
const int *occurrences,
const int *asso_values);
@@ -113,12 +115,14 @@ private:
unsigned int const _verbatim_code_lineno;
/* Total number of keys, counting duplicates. */
int const _total_keys;
/* Total number of duplicate hash values. */
int const _total_duplicates;
/* Maximum length of the longest keyword. */
int const _max_key_len;
/* Minimum length of the shortest keyword. */
int const _min_key_len;
/* Key positions. Only to be used if !options[ALLCHARS]. */
Positions const _key_positions;
/* Total number of duplicate hash values. */
int const _total_duplicates;
/* Minimum hash value for all keywords. */
int _min_hash_value;
/* Maximum hash value for all keywords. */

View File

@@ -27,7 +27,7 @@
#include <stdlib.h> /* declares exit(), rand(), srand() */
#include <string.h> /* declares memset(), memcmp() */
#include <time.h> /* declares time() */
#include <limits.h> /* defines INT_MIN, INT_MAX */
#include <limits.h> /* defines INT_MIN, INT_MAX, UINT_MAX */
#include "options.h"
#include "hash-table.h"
@@ -35,6 +35,7 @@
Search::Search (KeywordExt_List *list)
: _head (list),
_key_positions (option.get_key_positions()),
_alpha_size (option[SEVENBIT] ? 128 : 256),
_occurrences (new int[_alpha_size]),
_asso_values (new int[_alpha_size]),
@@ -43,7 +44,7 @@ Search::Search (KeywordExt_List *list)
}
void
Search::prepare ()
Search::preprepare ()
{
KeywordExt_List *temp;
@@ -52,10 +53,6 @@ Search::prepare ()
for (temp = _head; temp; temp = temp->rest())
_total_keys++;
/* Initialize each keyword's _selchars array. */
for (temp = _head; temp; temp = temp->rest())
temp->first()->init_selchars();
/* Compute the minimum and maximum keyword length. */
_max_key_len = INT_MIN;
_min_key_len = INT_MAX;
@@ -78,6 +75,212 @@ Search::prepare ()
"len == 0 before calling the gperf generated lookup function.\n");
exit (1);
}
}
/* Initializes each keyword's _selchars array. */
void
Search::init_selchars (bool use_all_chars, const Positions& positions) const
{
for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
temp->first()->init_selchars(use_all_chars, positions);
}
/* Deletes each keyword's _selchars array. */
void
Search::delete_selchars () const
{
for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
temp->first()->delete_selchars();
}
/* Count the duplicate keywords that occur with a given set of positions. */
unsigned int
Search::count_duplicates (const Positions& positions) const
{
init_selchars (false, positions);
unsigned int count = 0;
Hash_Table representatives (_total_keys, option[NOLENGTH]);
for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
{
KeywordExt *keyword = temp->first();
if (representatives.insert (keyword))
count++;
}
delete_selchars ();
return count;
}
void
Search::find_positions ()
{
/* Determine good key positions. */
/* 1. Find positions that must occur in order to distinguish duplicates. */
Positions mandatory;
if (!option[DUP])
{
for (KeywordExt_List *l1 = _head; l1 && l1->rest(); l1 = l1->rest())
{
KeywordExt *keyword1 = l1->first();
for (KeywordExt_List *l2 = l1->rest(); l2; l2 = l2->rest())
{
KeywordExt *keyword2 = l2->first();
/* If keyword1 and keyword2 have the same length and differ
in just one position, and it is not the last character,
this position is mandatory. */
if (keyword1->_allchars_length == keyword2->_allchars_length)
{
int n = keyword1->_allchars_length;
int i;
for (i = 1; i < n; i++)
if (keyword1->_allchars[i-1] != keyword2->_allchars[i-1])
break;
if (i < n
&& memcmp (&keyword1->_allchars[i],
&keyword2->_allchars[i],
n - i)
== 0)
{
/* Position i is mandatory. */
if (!mandatory.contains (i))
mandatory.add (i);
}
}
}
}
}
/* 2. Add positions, as long as this decreases the duplicates count. */
int imax = (_max_key_len < Positions::MAX_KEY_POS
? _max_key_len : Positions::MAX_KEY_POS);
Positions current = mandatory;
unsigned int current_duplicates_count = count_duplicates (current);
for (;;)
{
Positions best;
unsigned int best_duplicates_count = UINT_MAX;
for (int i = imax; i >= 0; i--)
if (!current.contains (i))
{
Positions tryal = current;
tryal.add (i);
unsigned int try_duplicates_count = count_duplicates (tryal);
/* We prefer 'try' to 'best' if it produces less duplicates,
or if it produces the same number of duplicates but with
a more efficient hash function. */
if (try_duplicates_count < best_duplicates_count
|| (try_duplicates_count == best_duplicates_count && i > 0))
{
best = tryal;
best_duplicates_count = try_duplicates_count;
}
}
/* Stop adding positions when it gives no improvement. */
if (best_duplicates_count >= current_duplicates_count)
break;
current = best;
current_duplicates_count = best_duplicates_count;
}
/* 3. Remove positions, as long as this doesn't increase the duplicates
count. */
for (;;)
{
Positions best;
unsigned int best_duplicates_count = UINT_MAX;
for (int i = imax; i >= 0; i--)
if (current.contains (i) && !mandatory.contains (i))
{
Positions tryal = current;
tryal.remove (i);
unsigned int try_duplicates_count = count_duplicates (tryal);
/* We prefer 'try' to 'best' if it produces less duplicates,
or if it produces the same number of duplicates but with
a more efficient hash function. */
if (try_duplicates_count < best_duplicates_count
|| (try_duplicates_count == best_duplicates_count && i == 0))
{
best = tryal;
best_duplicates_count = try_duplicates_count;
}
}
/* Stop removing positions when it gives no improvement. */
if (best_duplicates_count > current_duplicates_count)
break;
current = best;
current_duplicates_count = best_duplicates_count;
}
/* 4. Replace two positions by one, as long as this doesn't increase the
duplicates count. */
for (;;)
{
Positions best;
unsigned int best_duplicates_count = UINT_MAX;
for (int i1 = imax; i1 >= 0; i1--)
if (current.contains (i1) && !mandatory.contains (i1))
for (int i2 = imax; i2 >= 0; i2--)
if (current.contains (i2) && !mandatory.contains (i2) && i2 != i1)
for (int i3 = imax; i3 >= 0; i3--)
if (!current.contains (i3))
{
Positions tryal = current;
tryal.remove (i1);
tryal.remove (i2);
tryal.add (i3);
unsigned int try_duplicates_count =
count_duplicates (tryal);
/* We prefer 'try' to 'best' if it produces less duplicates,
or if it produces the same number of duplicates but with
a more efficient hash function. */
if (try_duplicates_count < best_duplicates_count
|| (try_duplicates_count == best_duplicates_count
&& (i1 == 0 || i2 == 0 || i3 > 0)))
{
best = tryal;
best_duplicates_count = try_duplicates_count;
}
}
/* Stop removing positions when it gives no improvement. */
if (best_duplicates_count > current_duplicates_count)
break;
current = best;
current_duplicates_count = best_duplicates_count;
}
/* That's it. Hope it's good enough. */
_key_positions = current;
}
void
Search::prepare ()
{
KeywordExt_List *temp;
preprepare ();
if (!option[POSITIONS])
find_positions ();
/* Initialize each keyword's _selchars array. */
init_selchars (option[ALLCHARS], _key_positions);
/* Check for duplicates, i.e. keywords with the same _selchars array
(and - if !option[NOLENGTH] - also the same length).
@@ -140,8 +343,12 @@ Search::prepare ()
_total_duplicates);
else
{
fprintf (stderr, "%d input keys have identical hash values,\ntry different key positions or use option -D.\n",
fprintf (stderr, "%d input keys have identical hash values,\n",
_total_duplicates);
if (option[POSITIONS])
fprintf (stderr, "try different key positions or use option -D.\n");
else
fprintf (stderr, "use option -D.\n");
exit (1);
}
}
@@ -313,7 +520,7 @@ Search::max_key_length () const
int
Search::get_max_keysig_size () const
{
return option[ALLCHARS] ? _max_key_len : option.get_max_keysig_size ();
return option[ALLCHARS] ? _max_key_len : _key_positions.get_size();
}
/* ---------------------- Finding good asso_values[] ----------------------- */
@@ -758,9 +965,12 @@ Search::optimize ()
else /* Yow, big problems. we're outta here! */
{
fprintf (stderr,
"\nInternal error, duplicate value %d:\n"
"try options -D or -m or -r, or use new key positions.\n\n",
"\nInternal error, duplicate hash code value %d:\n",
hashcode);
if (option[POSITIONS])
fprintf (stderr, "try options -m or -D or -r, or use new key positions.\n\n");
else
fprintf (stderr, "try options -m or -D or -r.\n\n");
exit (1);
}
}

View File

@@ -27,6 +27,7 @@
#define search_h 1
#include "keyword-list.h"
#include "options.h"
#include "bool-array.h"
class Search
@@ -36,6 +37,18 @@ public:
~Search ();
void optimize ();
private:
void preprepare ();
/* Initializes each keyword's _selchars array. */
void init_selchars (bool use_all_chars, const Positions& positions) const;
/* Deletes each keyword's _selchars array. */
void delete_selchars () const;
/* Count the duplicate keywords that occur with a given set of positions. */
unsigned int count_duplicates (const Positions& positions) const;
void find_positions ();
void prepare ();
/* Computes the sum of occurrences of the _selchars of a keyword. */
@@ -90,16 +103,19 @@ public:
/* Total number of keywords, counting duplicates. */
int _total_keys;
/* Total number of duplicates that have been moved to _duplicate_link lists
(not counting their representatives which stay on the main list). */
int _total_duplicates;
/* Maximum length of the longest keyword. */
int _max_key_len;
/* Minimum length of the shortest keyword. */
int _min_key_len;
/* User-specified or computed key positions. */
Positions _key_positions;
/* Total number of duplicates that have been moved to _duplicate_link lists
(not counting their representatives which stay on the main list). */
int _total_duplicates;
/* Size of alphabet. */
int const _alpha_size;

View File

@@ -135,7 +135,7 @@ check-test:
diff $(srcdir)/java.exp java.out
$(GPERF) -n -k1-8 -l < $(srcdir)/modula2.gperf > modula2.out
diff $(srcdir)/modula2.exp modula2.out
$(GPERF) -D -p -t < $(srcdir)/c-parse.gperf > test-4.out
$(GPERF) -D -p -t -k1,'$$' < $(srcdir)/c-parse.gperf > test-4.out
diff $(srcdir)/test-4.exp test-4.out
$(GPERF) -g -o -j1 -t -p -N is_reserved_word < $(srcdir)/gpc.gperf > gpc.out
diff $(srcdir)/gpc.exp gpc.out

View File

@@ -1,5 +1,6 @@
/* C code produced by gperf version 2.7.2 */
/* Command-line: ../src/gperf -g -o -j1 -t -p -N is_reserved_word */
/* Computed positions: -k'1,$' */
/* ISO Pascal 7185 reserved words.
*

View File

@@ -1,5 +1,5 @@
/* C code produced by gperf version 2.7.2 */
/* Command-line: ../src/gperf -D -p -t */
/* Command-line: ../src/gperf -D -p -t -k'1,$' */
/* Command-line: gperf -L KR-C -F ', 0, 0' -p -j1 -i 1 -g -o -t -N is_reserved_word -k1,3,$ c-parse.gperf */
struct resword { const char *name; short token; enum rid rid; };