mirror of
https://git.savannah.gnu.org/git/gperf.git
synced 2025-12-02 21:19:24 +00:00
When the option -k is not given, the default key positions are now computed
depending on the set of keywords.
This commit is contained in:
52
ChangeLog
52
ChangeLog
@@ -1,3 +1,55 @@
|
||||
2002-11-17 Bruno Haible <bruno@clisp.org>
|
||||
|
||||
* src/options.h (POSITIONS): New enum value.
|
||||
(Positions::Positions): New copy constructor.
|
||||
(Positions::operator=, Positions::contains, Position::add,
|
||||
Positions::remove, Positions::print): New method declaration.
|
||||
(Options::get_max_keysig_size): Remove method.
|
||||
* src/options.icc (Positions::Positions): New copy constructor.
|
||||
(Positions::operator=): New method.
|
||||
(Options::get_max_keysig_size): Remove method.
|
||||
* src/options.cc (Options::Options): Initialize _key_positions
|
||||
trivially.
|
||||
(Options::parse_options): Option -k sets POSITIONS.
|
||||
(Positions::contains, Positions::add, Positions::remove,
|
||||
Positions::print): New methods.
|
||||
* src/hash-table.cc (Hash_Table::~Hash_Table): Compute the field
|
||||
width explicitly, instead of using Options::get_max_keysig_size.
|
||||
* src/keyword.h (KeywordExt::init_selchars): Add arguments
|
||||
use_all_chars, positions.
|
||||
(KeywordExt::delete_selchars): New declaration.
|
||||
* src/keyword.cc (KeywordExt::init_selchars): Add arguments
|
||||
use_all_chars, positions. Remove error message if there are no key
|
||||
positions.
|
||||
(KeywordExt::delete_selchars): New method.
|
||||
* src/search.h: Include options.h.
|
||||
(Search::preprepare, Search::init_selchars, Search::delete_selchars,
|
||||
Search::count_duplicates, Search::find_positions): New declarations.
|
||||
(Search::_key_positions): New field.
|
||||
* src/search.cc (Search::Search): Initialize _key_positions.
|
||||
(Search::preprepare, Search::init_selchars, Search::delete_selchars,
|
||||
Search::count_duplicates, Search::find_positions): New functions.
|
||||
(Search::prepare): Call preprepare and find_positions. Tweak error
|
||||
message.
|
||||
(Search::get_max_keysig_size): Use _key_positions instead of
|
||||
option.get_key_positions().
|
||||
(Search::optimize): Tweak error message.
|
||||
* src/output.h: Include options.h.
|
||||
(Output::Output): Add Positions argument.
|
||||
(Output::_key_positions): New field.
|
||||
* src/output.cc (Output::Output): Add Positions argument.
|
||||
(Output::output_hash_function): Omit the table if there are no
|
||||
positions at all. Use _key_positions instead of
|
||||
option.get_key_positions().
|
||||
(Output::output): Output the computed positions as a comment.
|
||||
* src/main.cc (main): Pass the Positions from Searcher to Output.
|
||||
* src/Makefile.in (SEARCH_H, OUTPUT_H): Include OPTIONS_H.
|
||||
* tests/Makefile.in (check-test): Pass key positions explicitly.
|
||||
* tests/gpc.exp: Update.
|
||||
* tests/test-4.exp: Update.
|
||||
* doc/gperf.texi (Algorithmic Details): Mention that -k is not needed
|
||||
usually.
|
||||
|
||||
2002-11-16 Bruno Haible <bruno@clisp.org>
|
||||
|
||||
* src/options.h (Options::get_slot_name): Renamed from
|
||||
|
||||
2
NEWS
2
NEWS
@@ -27,6 +27,8 @@ New in 2.8:
|
||||
%define word-array-name NAME
|
||||
%switch=COUNT
|
||||
%omit-struct-type
|
||||
* When the option -k is not given, the default key positions are now
|
||||
computed depending on the set of keywords.
|
||||
* If the input file is given by name, the output file will now contain
|
||||
#line directives referring to the input file.
|
||||
* Bug fixes.
|
||||
|
||||
@@ -985,6 +985,10 @@ with length less than the indicated byte positions work properly, since
|
||||
selected byte positions exceeding the keyword length are simply not
|
||||
referenced in the hash function.
|
||||
|
||||
This option is not normally needed since version 2.8 of @code{gperf};
|
||||
the default byte positions are computed depending on the keyword set,
|
||||
through a search that minimizes the number of byte positions.
|
||||
|
||||
@item -D
|
||||
@itemx --duplicates
|
||||
@cindex Duplicates
|
||||
|
||||
@@ -91,8 +91,8 @@ KEYWORD_LIST_H = keyword-list.h keyword-list.icc $(KEYWORD_H)
|
||||
INPUT_H = input.h $(KEYWORD_LIST_H)
|
||||
BOOL_ARRAY_H = bool-array.h bool-array.icc $(OPTIONS_H)
|
||||
HASH_TABLE_H = hash-table.h $(KEYWORD_H)
|
||||
SEARCH_H = search.h $(KEYWORD_LIST_H) $(BOOL_ARRAY_H)
|
||||
OUTPUT_H = output.h $(KEYWORD_LIST_H)
|
||||
SEARCH_H = search.h $(KEYWORD_LIST_H) $(OPTIONS_H) $(BOOL_ARRAY_H)
|
||||
OUTPUT_H = output.h $(KEYWORD_LIST_H) $(OPTIONS_H)
|
||||
|
||||
version.o : version.cc $(VERSION_H)
|
||||
$(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $(srcdir)/version.cc
|
||||
|
||||
@@ -89,16 +89,11 @@ Hash_Table::~Hash_Table ()
|
||||
{
|
||||
int field_width;
|
||||
|
||||
if (option[ALLCHARS])
|
||||
{
|
||||
field_width = 0;
|
||||
for (int i = _size - 1; i >= 0; i--)
|
||||
if (_table[i])
|
||||
if (field_width < _table[i]->_selchars_length)
|
||||
field_width = _table[i]->_selchars_length;
|
||||
}
|
||||
else
|
||||
field_width = option.get_max_keysig_size ();
|
||||
field_width = 0;
|
||||
for (int i = _size - 1; i >= 0; i--)
|
||||
if (_table[i])
|
||||
if (field_width < _table[i]->_selchars_length)
|
||||
field_width = _table[i]->_selchars_length;
|
||||
|
||||
fprintf (stderr,
|
||||
"\ndumping the hash table\n"
|
||||
|
||||
@@ -47,7 +47,7 @@ static inline void sort_char_set (unsigned char *base, int len)
|
||||
}
|
||||
}
|
||||
|
||||
/* Initialize selchars and selchars_length.
|
||||
/* Initializes selchars and selchars_length.
|
||||
The hash function will be computed as
|
||||
asso_values[allchars[key_pos[0]]] + asso_values[allchars[key_pos[1]]] + ...
|
||||
We compute selchars as the multiset
|
||||
@@ -57,14 +57,15 @@ static inline void sort_char_set (unsigned char *base, int len)
|
||||
Furthermore we sort the selchars array, to ease detection of duplicates
|
||||
later.
|
||||
*/
|
||||
void KeywordExt::init_selchars ()
|
||||
void
|
||||
KeywordExt::init_selchars (bool use_all_chars, const Positions& positions)
|
||||
{
|
||||
const char *k = _allchars;
|
||||
unsigned char *key_set =
|
||||
new unsigned char[(option[ALLCHARS] ? _allchars_length : option.get_max_keysig_size ())];
|
||||
new unsigned char[(use_all_chars ? _allchars_length : positions.get_size ())];
|
||||
unsigned char *ptr = key_set;
|
||||
|
||||
if (option[ALLCHARS])
|
||||
if (use_all_chars)
|
||||
/* Use all the character positions in the KEY. */
|
||||
for (int i = _allchars_length; i > 0; k++, i--)
|
||||
{
|
||||
@@ -76,7 +77,7 @@ void KeywordExt::init_selchars ()
|
||||
{
|
||||
/* Iterate through the list of key_positions, initializing selchars
|
||||
(via ptr). */
|
||||
PositionIterator iter (option.get_key_positions ());
|
||||
PositionIterator iter (positions);
|
||||
|
||||
for (int i; (i = iter.next ()) != PositionIterator::EOS; )
|
||||
{
|
||||
@@ -91,15 +92,6 @@ void KeywordExt::init_selchars ()
|
||||
continue;
|
||||
ptr++;
|
||||
}
|
||||
|
||||
/* Didn't get any hits and user doesn't want to consider the
|
||||
keylength, so there are essentially no usable hash positions! */
|
||||
if (ptr == key_set && option[NOLENGTH])
|
||||
{
|
||||
fprintf (stderr, "Can't hash keyword %.*s with chosen key positions.\n",
|
||||
_allchars_length, _allchars);
|
||||
exit (1);
|
||||
}
|
||||
}
|
||||
|
||||
/* Sort the KEY_SET items alphabetically. */
|
||||
@@ -109,6 +101,13 @@ void KeywordExt::init_selchars ()
|
||||
_selchars_length = ptr - key_set;
|
||||
}
|
||||
|
||||
/* Deletes selchars. */
|
||||
void
|
||||
KeywordExt::delete_selchars ()
|
||||
{
|
||||
delete[] _selchars;
|
||||
}
|
||||
|
||||
|
||||
/* ------------------------- Keyword_Factory class ------------------------- */
|
||||
|
||||
|
||||
@@ -26,6 +26,9 @@
|
||||
#ifndef keyword_h
|
||||
#define keyword_h 1
|
||||
|
||||
/* Class defined in "options.h". */
|
||||
class Positions;
|
||||
|
||||
/* An instance of this class is a keyword, as specified in the input file. */
|
||||
|
||||
struct Keyword
|
||||
@@ -64,8 +67,10 @@ struct KeywordExt : public Keyword
|
||||
KeywordExt * _duplicate_link;
|
||||
|
||||
/* Methods depending on the keyposition list. */
|
||||
/* Initialize selchars and selchars_length. */
|
||||
void init_selchars ();
|
||||
/* Initializes selchars and selchars_length. */
|
||||
void init_selchars (bool use_all_chars, const Positions& positions);
|
||||
/* Deletes selchars. */
|
||||
void delete_selchars ();
|
||||
|
||||
/* Data members used by the algorithm. */
|
||||
int _occurrence; /* Frequency of key set occurrences. */
|
||||
|
||||
@@ -103,9 +103,10 @@ main (int argc, char *argv[])
|
||||
inputter._verbatim_code_end,
|
||||
inputter._verbatim_code_lineno,
|
||||
searcher._total_keys,
|
||||
searcher._total_duplicates,
|
||||
searcher._max_key_len,
|
||||
searcher._min_key_len,
|
||||
searcher._key_positions,
|
||||
searcher._total_duplicates,
|
||||
searcher._alpha_size,
|
||||
searcher._occurrences,
|
||||
searcher._asso_values);
|
||||
|
||||
142
src/options.cc
142
src/options.cc
@@ -442,7 +442,7 @@ Options::Options ()
|
||||
_hash_name (DEFAULT_HASH_NAME),
|
||||
_wordlist_name (DEFAULT_WORDLIST_NAME),
|
||||
_delimiters (DEFAULT_DELIMITERS),
|
||||
_key_positions (1, Positions::LASTCHAR)
|
||||
_key_positions ()
|
||||
{
|
||||
}
|
||||
|
||||
@@ -766,6 +766,7 @@ Options::parse_options (int argc, char *argv[])
|
||||
}
|
||||
case 'k': /* Sets key positions used for hash function. */
|
||||
{
|
||||
_option_word |= POSITIONS;
|
||||
const int BAD_VALUE = -2;
|
||||
const int EOS = PositionIterator::EOS;
|
||||
int value;
|
||||
@@ -782,7 +783,7 @@ Options::parse_options (int argc, char *argv[])
|
||||
{
|
||||
if (value == BAD_VALUE)
|
||||
{
|
||||
fprintf (stderr, "Invalid key value or range, use 1,2,3-%d,'$' or '*'.\n",
|
||||
fprintf (stderr, "Invalid position value or range, use 1,2,3-%d,'$' or '*'.\n",
|
||||
Positions::MAX_KEY_POS);
|
||||
short_usage (stderr);
|
||||
exit (1);
|
||||
@@ -793,7 +794,7 @@ Options::parse_options (int argc, char *argv[])
|
||||
Since all key positions are in the range
|
||||
1..Positions::MAX_KEY_POS or == Positions::LASTCHAR,
|
||||
there must be duplicates. */
|
||||
fprintf (stderr, "Duplicate keys selected\n");
|
||||
fprintf (stderr, "Duplicate key positions selected\n");
|
||||
short_usage (stderr);
|
||||
exit (1);
|
||||
}
|
||||
@@ -803,7 +804,7 @@ Options::parse_options (int argc, char *argv[])
|
||||
unsigned int total_keysig_size = key_pos - key_positions;
|
||||
if (total_keysig_size == 0)
|
||||
{
|
||||
fprintf (stderr, "No keys selected.\n");
|
||||
fprintf (stderr, "No key positions selected.\n");
|
||||
short_usage (stderr);
|
||||
exit (1);
|
||||
}
|
||||
@@ -814,7 +815,7 @@ Options::parse_options (int argc, char *argv[])
|
||||
when generating code. */
|
||||
if (! _key_positions.sort())
|
||||
{
|
||||
fprintf (stderr, "Duplicate keys selected\n");
|
||||
fprintf (stderr, "Duplicate key positions selected\n");
|
||||
short_usage (stderr);
|
||||
exit (1);
|
||||
}
|
||||
@@ -939,6 +940,137 @@ Options::parse_options (int argc, char *argv[])
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------- Class Positions ---------------------------- */
|
||||
|
||||
/* Set operations. Assumes the array is in reverse order. */
|
||||
|
||||
bool
|
||||
Positions::contains (int pos) const
|
||||
{
|
||||
unsigned int count = _size;
|
||||
const unsigned char *p = _positions + _size - 1;
|
||||
|
||||
for (; count > 0; p--, count--)
|
||||
{
|
||||
if (*p == pos)
|
||||
return true;
|
||||
if (*p > pos)
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
Positions::add (int pos)
|
||||
{
|
||||
unsigned int count = _size;
|
||||
|
||||
if (count == MAX_KEY_POS + 1)
|
||||
{
|
||||
fprintf (stderr, "Positions::add internal error: overflow\n");
|
||||
exit (1);
|
||||
}
|
||||
|
||||
unsigned char *p = _positions + _size - 1;
|
||||
|
||||
for (; count > 0; p--, count--)
|
||||
{
|
||||
if (*p == pos)
|
||||
{
|
||||
fprintf (stderr, "Positions::add internal error: duplicate\n");
|
||||
exit (1);
|
||||
}
|
||||
if (*p > pos)
|
||||
break;
|
||||
p[1] = p[0];
|
||||
}
|
||||
p[1] = pos;
|
||||
_size++;
|
||||
}
|
||||
|
||||
void
|
||||
Positions::remove (int pos)
|
||||
{
|
||||
unsigned int count = _size;
|
||||
if (count > 0)
|
||||
{
|
||||
unsigned char *p = _positions + _size - 1;
|
||||
|
||||
if (*p == pos)
|
||||
{
|
||||
_size--;
|
||||
return;
|
||||
}
|
||||
if (*p < pos)
|
||||
{
|
||||
unsigned char prev = *p;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
p--;
|
||||
count--;
|
||||
if (count == 0)
|
||||
break;
|
||||
if (*p == pos)
|
||||
{
|
||||
*p = prev;
|
||||
_size--;
|
||||
return;
|
||||
}
|
||||
if (*p > pos)
|
||||
break;
|
||||
unsigned char curr = *p;
|
||||
*p = prev;
|
||||
prev = curr;
|
||||
}
|
||||
}
|
||||
}
|
||||
fprintf (stderr, "Positions::remove internal error: not found\n");
|
||||
exit (1);
|
||||
}
|
||||
|
||||
/* Output in external syntax. */
|
||||
void
|
||||
Positions::print () const
|
||||
{
|
||||
bool first = true;
|
||||
bool seen_LASTCHAR = false;
|
||||
unsigned int count = _size;
|
||||
const unsigned char *p = _positions + _size - 1;
|
||||
|
||||
for (; count > 0; p--, count--)
|
||||
{
|
||||
if (*p == LASTCHAR)
|
||||
seen_LASTCHAR = true;
|
||||
else
|
||||
{
|
||||
if (!first)
|
||||
printf (",");
|
||||
printf ("%d", *p);
|
||||
if (count > 0 && p[-1] == *p + 1)
|
||||
{
|
||||
printf ("-");
|
||||
do
|
||||
{
|
||||
p--;
|
||||
count--;
|
||||
}
|
||||
while (count > 0 && p[-1] == *p + 1);
|
||||
printf ("%d", *p);
|
||||
}
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
if (seen_LASTCHAR)
|
||||
{
|
||||
if (!first)
|
||||
printf (",");
|
||||
printf ("$");
|
||||
}
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef __OPTIMIZE__
|
||||
|
||||
#define INLINE /* not inline */
|
||||
|
||||
@@ -41,63 +41,66 @@ enum Option_Type
|
||||
/* Apply ordering heuristic to speed-up search time. */
|
||||
ORDER = 1 << 1,
|
||||
|
||||
/* Use the given key positions. */
|
||||
POSITIONS = 1 << 2,
|
||||
|
||||
/* Use all characters in hash function. */
|
||||
ALLCHARS = 1 << 2,
|
||||
ALLCHARS = 1 << 3,
|
||||
|
||||
/* Handle user-defined type structured keyword input. */
|
||||
TYPE = 1 << 3,
|
||||
TYPE = 1 << 4,
|
||||
|
||||
/* Randomly initialize the associated values table. */
|
||||
RANDOM = 1 << 4,
|
||||
RANDOM = 1 << 5,
|
||||
|
||||
/* Generate switch output to save space. */
|
||||
SWITCH = 1 << 5,
|
||||
SWITCH = 1 << 6,
|
||||
|
||||
/* Don't include keyword length in hash computations. */
|
||||
NOLENGTH = 1 << 6,
|
||||
NOLENGTH = 1 << 7,
|
||||
|
||||
/* Generate a length table for string comparison. */
|
||||
LENTABLE = 1 << 7,
|
||||
LENTABLE = 1 << 8,
|
||||
|
||||
/* Handle duplicate hash values for keywords. */
|
||||
DUP = 1 << 8,
|
||||
DUP = 1 << 9,
|
||||
|
||||
/* Generate the hash function "fast". */
|
||||
FAST = 1 << 9,
|
||||
FAST = 1 << 10,
|
||||
|
||||
/* Don't include user-defined type definition in output -- it's already
|
||||
defined elsewhere. */
|
||||
NOTYPE = 1 << 10,
|
||||
NOTYPE = 1 << 11,
|
||||
|
||||
/* Generate strncmp rather than strcmp. */
|
||||
COMP = 1 << 11,
|
||||
COMP = 1 << 12,
|
||||
|
||||
/* Make the keyword table a global variable. */
|
||||
GLOBAL = 1 << 12,
|
||||
GLOBAL = 1 << 13,
|
||||
|
||||
/* Make the generated tables readonly (const). */
|
||||
CONST = 1 << 13,
|
||||
CONST = 1 << 14,
|
||||
|
||||
/* Generate K&R C code: no prototypes, no const. */
|
||||
KRC = 1 << 14,
|
||||
KRC = 1 << 15,
|
||||
|
||||
/* Generate C code: no prototypes, but const (user can #define it away). */
|
||||
C = 1 << 15,
|
||||
C = 1 << 16,
|
||||
|
||||
/* Generate ISO/ANSI C code: prototypes and const, but no class. */
|
||||
ANSIC = 1 << 16,
|
||||
ANSIC = 1 << 17,
|
||||
|
||||
/* Generate C++ code: prototypes, const, class, inline, enum. */
|
||||
CPLUSPLUS = 1 << 17,
|
||||
CPLUSPLUS = 1 << 18,
|
||||
|
||||
/* Use enum for constants. */
|
||||
ENUM = 1 << 18,
|
||||
ENUM = 1 << 19,
|
||||
|
||||
/* Generate #include statements. */
|
||||
INCLUDE = 1 << 21,
|
||||
INCLUDE = 1 << 20,
|
||||
|
||||
/* Assume 7-bit, not 8-bit, characters. */
|
||||
SEVENBIT = 1 << 22
|
||||
SEVENBIT = 1 << 21
|
||||
};
|
||||
|
||||
/* This class denotes a set of key positions. */
|
||||
@@ -115,8 +118,14 @@ public:
|
||||
|
||||
/* Constructors. */
|
||||
Positions ();
|
||||
Positions (int key1);
|
||||
Positions (int key1, int key2);
|
||||
Positions (int pos1);
|
||||
Positions (int pos1, int pos2);
|
||||
|
||||
/* Copy constructor. */
|
||||
Positions (const Positions& src);
|
||||
|
||||
/* Assignment operator. */
|
||||
Positions& operator= (const Positions& src);
|
||||
|
||||
/* Accessors. */
|
||||
int operator[] (unsigned int index) const;
|
||||
@@ -130,6 +139,14 @@ public:
|
||||
Returns true if there are no duplicates, false otherwise. */
|
||||
bool sort ();
|
||||
|
||||
/* Set operations. Assumes the array is in reverse order. */
|
||||
bool contains (int pos) const;
|
||||
void add (int pos);
|
||||
void remove (int pos);
|
||||
|
||||
/* Output in external syntax. */
|
||||
void print () const;
|
||||
|
||||
private:
|
||||
/* Number of positions. */
|
||||
unsigned int _size;
|
||||
@@ -246,13 +263,9 @@ public:
|
||||
void set_delimiters (const char *delimiters);
|
||||
|
||||
/* Returns key positions.
|
||||
Only to be called if !options[ALLCHARS]. */
|
||||
Only to be used if !options[ALLCHARS]. */
|
||||
const Positions& get_key_positions () const;
|
||||
|
||||
/* Returns total distinct key positions.
|
||||
Only to be called if !options[ALLCHARS]. */
|
||||
int get_max_keysig_size () const;
|
||||
|
||||
private:
|
||||
/* Prints program usage to given stream. */
|
||||
void short_usage (FILE * stream) const;
|
||||
|
||||
@@ -32,18 +32,37 @@ Positions::Positions ()
|
||||
}
|
||||
|
||||
INLINE
|
||||
Positions::Positions (int key1)
|
||||
Positions::Positions (int pos1)
|
||||
: _size (1)
|
||||
{
|
||||
_positions[0] = key1;
|
||||
_positions[0] = pos1;
|
||||
}
|
||||
|
||||
INLINE
|
||||
Positions::Positions (int key1, int key2)
|
||||
Positions::Positions (int pos1, int pos2)
|
||||
: _size (2)
|
||||
{
|
||||
_positions[0] = key1;
|
||||
_positions[1] = key2;
|
||||
_positions[0] = pos1;
|
||||
_positions[1] = pos2;
|
||||
}
|
||||
|
||||
/* Copy constructor. */
|
||||
|
||||
INLINE
|
||||
Positions::Positions (const Positions& src)
|
||||
: _size (src._size)
|
||||
{
|
||||
memcpy (_positions, src._positions, _size * sizeof (_positions[0]));
|
||||
}
|
||||
|
||||
/* Assignment operator. */
|
||||
|
||||
INLINE Positions&
|
||||
Positions::operator= (const Positions& src)
|
||||
{
|
||||
_size = src._size;
|
||||
memcpy (_positions, src._positions, _size * sizeof (_positions[0]));
|
||||
return *this;
|
||||
}
|
||||
|
||||
/* Accessors. */
|
||||
@@ -238,17 +257,9 @@ Options::get_delimiters () const
|
||||
}
|
||||
|
||||
/* Returns key positions.
|
||||
Only to be called if !options[ALLCHARS]. */
|
||||
Only to be used if !options[ALLCHARS]. */
|
||||
INLINE const Positions&
|
||||
Options::get_key_positions () const
|
||||
{
|
||||
return _key_positions;
|
||||
}
|
||||
|
||||
/* Returns total distinct key positions.
|
||||
Only to be called if !options[ALLCHARS]. */
|
||||
INLINE int
|
||||
Options::get_max_keysig_size () const
|
||||
{
|
||||
return _key_positions.get_size();
|
||||
}
|
||||
|
||||
@@ -87,8 +87,9 @@ Output::Output (KeywordExt_List *head, const char *struct_decl,
|
||||
unsigned int verbatim_declarations_lineno,
|
||||
const char *verbatim_code, const char *verbatim_code_end,
|
||||
unsigned int verbatim_code_lineno,
|
||||
int total_keys, int total_duplicates, int max_key_len,
|
||||
int min_key_len, int alpha_size, const int *occurrences,
|
||||
int total_keys, int max_key_len, int min_key_len,
|
||||
const Positions& positions, int total_duplicates,
|
||||
int alpha_size, const int *occurrences,
|
||||
const int *asso_values)
|
||||
: _head (head), _struct_decl (struct_decl),
|
||||
_struct_decl_lineno (struct_decl_lineno), _return_type (return_type),
|
||||
@@ -99,9 +100,10 @@ Output::Output (KeywordExt_List *head, const char *struct_decl,
|
||||
_verbatim_code (verbatim_code),
|
||||
_verbatim_code_end (verbatim_code_end),
|
||||
_verbatim_code_lineno (verbatim_code_lineno),
|
||||
_total_keys (total_keys), _total_duplicates (total_duplicates),
|
||||
_total_keys (total_keys),
|
||||
_max_key_len (max_key_len), _min_key_len (min_key_len),
|
||||
_alpha_size (alpha_size),
|
||||
_key_positions (positions),
|
||||
_total_duplicates (total_duplicates), _alpha_size (alpha_size),
|
||||
_occurrences (occurrences), _asso_values (asso_values)
|
||||
{
|
||||
}
|
||||
@@ -480,32 +482,33 @@ Output::output_hash_function () const
|
||||
printf ("{\n");
|
||||
|
||||
/* First the asso_values array. */
|
||||
{
|
||||
printf (" static %s%s asso_values[] =\n"
|
||||
" {",
|
||||
const_readonly_array,
|
||||
smallest_integral_type (_max_hash_value + 1));
|
||||
if (option[ALLCHARS] || _key_positions.get_size() > 0)
|
||||
{
|
||||
printf (" static %s%s asso_values[] =\n"
|
||||
" {",
|
||||
const_readonly_array,
|
||||
smallest_integral_type (_max_hash_value + 1));
|
||||
|
||||
const int columns = 10;
|
||||
const int columns = 10;
|
||||
|
||||
/* Calculate maximum number of digits required for MAX_HASH_VALUE. */
|
||||
int field_width = 2;
|
||||
for (int trunc = _max_hash_value; (trunc /= 10) > 0;)
|
||||
field_width++;
|
||||
/* Calculate maximum number of digits required for MAX_HASH_VALUE. */
|
||||
int field_width = 2;
|
||||
for (int trunc = _max_hash_value; (trunc /= 10) > 0;)
|
||||
field_width++;
|
||||
|
||||
for (int count = 0; count < _alpha_size; count++)
|
||||
{
|
||||
if (count > 0)
|
||||
printf (",");
|
||||
if ((count % columns) == 0)
|
||||
printf ("\n ");
|
||||
printf ("%*d", field_width,
|
||||
_occurrences[count] ? _asso_values[count] : _max_hash_value + 1);
|
||||
}
|
||||
for (int count = 0; count < _alpha_size; count++)
|
||||
{
|
||||
if (count > 0)
|
||||
printf (",");
|
||||
if ((count % columns) == 0)
|
||||
printf ("\n ");
|
||||
printf ("%*d", field_width,
|
||||
_occurrences[count] ? _asso_values[count] : _max_hash_value + 1);
|
||||
}
|
||||
|
||||
printf ("\n"
|
||||
" };\n");
|
||||
}
|
||||
printf ("\n"
|
||||
" };\n");
|
||||
}
|
||||
|
||||
if (option[ALLCHARS])
|
||||
{
|
||||
@@ -526,12 +529,18 @@ Output::output_hash_function () const
|
||||
" }\n"
|
||||
" return hval;\n");
|
||||
}
|
||||
else if (_key_positions.get_size() == 0)
|
||||
{
|
||||
/* Trivial case: No key positions at all. */
|
||||
printf (" return %s;\n",
|
||||
option[NOLENGTH] ? "0" : "len");
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Iterate through the key positions. Remember that Positions::sort()
|
||||
has sorted them in decreasing order, with Positions::LASTCHAR coming
|
||||
last. */
|
||||
PositionIterator iter (option.get_key_positions());
|
||||
PositionIterator iter (_key_positions);
|
||||
int key_pos;
|
||||
|
||||
/* Get the highest key position. */
|
||||
@@ -547,9 +556,9 @@ Output::output_hash_function () const
|
||||
printf (" return %s",
|
||||
option[NOLENGTH] ? "" : "len + ");
|
||||
|
||||
if (option.get_key_positions().get_size() == 2
|
||||
&& option.get_key_positions()[0] == 1
|
||||
&& option.get_key_positions()[1] == Positions::LASTCHAR)
|
||||
if (_key_positions.get_size() == 2
|
||||
&& _key_positions[0] == 1
|
||||
&& _key_positions[1] == Positions::LASTCHAR)
|
||||
/* Optimize special case of "-k 1,$". */
|
||||
printf ("asso_values[%sstr[len - 1]] + asso_values[%sstr[0]]",
|
||||
char_to_index, char_to_index);
|
||||
@@ -1492,6 +1501,12 @@ Output::output ()
|
||||
printf (" code produced by gperf version %s */\n", version_string);
|
||||
option.print_options ();
|
||||
printf ("\n");
|
||||
if (!option[POSITIONS])
|
||||
{
|
||||
printf ("/* Computed positions: -k'");
|
||||
_key_positions.print();
|
||||
printf ("' */\n");
|
||||
}
|
||||
|
||||
if (_verbatim_declarations < _verbatim_declarations_end)
|
||||
{
|
||||
|
||||
10
src/output.h
10
src/output.h
@@ -27,6 +27,7 @@
|
||||
#define output_h 1
|
||||
|
||||
#include "keyword-list.h"
|
||||
#include "options.h"
|
||||
|
||||
/* OSF/1 cxx needs these forward declarations. */
|
||||
struct Output_Constants;
|
||||
@@ -48,8 +49,9 @@ public:
|
||||
const char *verbatim_code_end,
|
||||
unsigned int verbatim_code_lineno,
|
||||
int total_keys,
|
||||
int total_duplicates,
|
||||
int max_key_len, int min_key_len,
|
||||
const Positions& positions,
|
||||
int total_duplicates,
|
||||
int alpha_size,
|
||||
const int *occurrences,
|
||||
const int *asso_values);
|
||||
@@ -113,12 +115,14 @@ private:
|
||||
unsigned int const _verbatim_code_lineno;
|
||||
/* Total number of keys, counting duplicates. */
|
||||
int const _total_keys;
|
||||
/* Total number of duplicate hash values. */
|
||||
int const _total_duplicates;
|
||||
/* Maximum length of the longest keyword. */
|
||||
int const _max_key_len;
|
||||
/* Minimum length of the shortest keyword. */
|
||||
int const _min_key_len;
|
||||
/* Key positions. Only to be used if !options[ALLCHARS]. */
|
||||
Positions const _key_positions;
|
||||
/* Total number of duplicate hash values. */
|
||||
int const _total_duplicates;
|
||||
/* Minimum hash value for all keywords. */
|
||||
int _min_hash_value;
|
||||
/* Maximum hash value for all keywords. */
|
||||
|
||||
230
src/search.cc
230
src/search.cc
@@ -27,7 +27,7 @@
|
||||
#include <stdlib.h> /* declares exit(), rand(), srand() */
|
||||
#include <string.h> /* declares memset(), memcmp() */
|
||||
#include <time.h> /* declares time() */
|
||||
#include <limits.h> /* defines INT_MIN, INT_MAX */
|
||||
#include <limits.h> /* defines INT_MIN, INT_MAX, UINT_MAX */
|
||||
#include "options.h"
|
||||
#include "hash-table.h"
|
||||
|
||||
@@ -35,6 +35,7 @@
|
||||
|
||||
Search::Search (KeywordExt_List *list)
|
||||
: _head (list),
|
||||
_key_positions (option.get_key_positions()),
|
||||
_alpha_size (option[SEVENBIT] ? 128 : 256),
|
||||
_occurrences (new int[_alpha_size]),
|
||||
_asso_values (new int[_alpha_size]),
|
||||
@@ -43,7 +44,7 @@ Search::Search (KeywordExt_List *list)
|
||||
}
|
||||
|
||||
void
|
||||
Search::prepare ()
|
||||
Search::preprepare ()
|
||||
{
|
||||
KeywordExt_List *temp;
|
||||
|
||||
@@ -52,10 +53,6 @@ Search::prepare ()
|
||||
for (temp = _head; temp; temp = temp->rest())
|
||||
_total_keys++;
|
||||
|
||||
/* Initialize each keyword's _selchars array. */
|
||||
for (temp = _head; temp; temp = temp->rest())
|
||||
temp->first()->init_selchars();
|
||||
|
||||
/* Compute the minimum and maximum keyword length. */
|
||||
_max_key_len = INT_MIN;
|
||||
_min_key_len = INT_MAX;
|
||||
@@ -78,6 +75,212 @@ Search::prepare ()
|
||||
"len == 0 before calling the gperf generated lookup function.\n");
|
||||
exit (1);
|
||||
}
|
||||
}
|
||||
|
||||
/* Initializes each keyword's _selchars array. */
|
||||
void
|
||||
Search::init_selchars (bool use_all_chars, const Positions& positions) const
|
||||
{
|
||||
for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
|
||||
temp->first()->init_selchars(use_all_chars, positions);
|
||||
}
|
||||
|
||||
/* Deletes each keyword's _selchars array. */
|
||||
void
|
||||
Search::delete_selchars () const
|
||||
{
|
||||
for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
|
||||
temp->first()->delete_selchars();
|
||||
}
|
||||
|
||||
/* Count the duplicate keywords that occur with a given set of positions. */
|
||||
unsigned int
|
||||
Search::count_duplicates (const Positions& positions) const
|
||||
{
|
||||
init_selchars (false, positions);
|
||||
|
||||
unsigned int count = 0;
|
||||
Hash_Table representatives (_total_keys, option[NOLENGTH]);
|
||||
for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
|
||||
{
|
||||
KeywordExt *keyword = temp->first();
|
||||
if (representatives.insert (keyword))
|
||||
count++;
|
||||
}
|
||||
|
||||
delete_selchars ();
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
void
|
||||
Search::find_positions ()
|
||||
{
|
||||
/* Determine good key positions. */
|
||||
|
||||
/* 1. Find positions that must occur in order to distinguish duplicates. */
|
||||
Positions mandatory;
|
||||
|
||||
if (!option[DUP])
|
||||
{
|
||||
for (KeywordExt_List *l1 = _head; l1 && l1->rest(); l1 = l1->rest())
|
||||
{
|
||||
KeywordExt *keyword1 = l1->first();
|
||||
for (KeywordExt_List *l2 = l1->rest(); l2; l2 = l2->rest())
|
||||
{
|
||||
KeywordExt *keyword2 = l2->first();
|
||||
|
||||
/* If keyword1 and keyword2 have the same length and differ
|
||||
in just one position, and it is not the last character,
|
||||
this position is mandatory. */
|
||||
if (keyword1->_allchars_length == keyword2->_allchars_length)
|
||||
{
|
||||
int n = keyword1->_allchars_length;
|
||||
int i;
|
||||
for (i = 1; i < n; i++)
|
||||
if (keyword1->_allchars[i-1] != keyword2->_allchars[i-1])
|
||||
break;
|
||||
if (i < n
|
||||
&& memcmp (&keyword1->_allchars[i],
|
||||
&keyword2->_allchars[i],
|
||||
n - i)
|
||||
== 0)
|
||||
{
|
||||
/* Position i is mandatory. */
|
||||
if (!mandatory.contains (i))
|
||||
mandatory.add (i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* 2. Add positions, as long as this decreases the duplicates count. */
|
||||
int imax = (_max_key_len < Positions::MAX_KEY_POS
|
||||
? _max_key_len : Positions::MAX_KEY_POS);
|
||||
Positions current = mandatory;
|
||||
unsigned int current_duplicates_count = count_duplicates (current);
|
||||
for (;;)
|
||||
{
|
||||
Positions best;
|
||||
unsigned int best_duplicates_count = UINT_MAX;
|
||||
|
||||
for (int i = imax; i >= 0; i--)
|
||||
if (!current.contains (i))
|
||||
{
|
||||
Positions tryal = current;
|
||||
tryal.add (i);
|
||||
unsigned int try_duplicates_count = count_duplicates (tryal);
|
||||
|
||||
/* We prefer 'try' to 'best' if it produces less duplicates,
|
||||
or if it produces the same number of duplicates but with
|
||||
a more efficient hash function. */
|
||||
if (try_duplicates_count < best_duplicates_count
|
||||
|| (try_duplicates_count == best_duplicates_count && i > 0))
|
||||
{
|
||||
best = tryal;
|
||||
best_duplicates_count = try_duplicates_count;
|
||||
}
|
||||
}
|
||||
|
||||
/* Stop adding positions when it gives no improvement. */
|
||||
if (best_duplicates_count >= current_duplicates_count)
|
||||
break;
|
||||
|
||||
current = best;
|
||||
current_duplicates_count = best_duplicates_count;
|
||||
}
|
||||
|
||||
/* 3. Remove positions, as long as this doesn't increase the duplicates
|
||||
count. */
|
||||
for (;;)
|
||||
{
|
||||
Positions best;
|
||||
unsigned int best_duplicates_count = UINT_MAX;
|
||||
|
||||
for (int i = imax; i >= 0; i--)
|
||||
if (current.contains (i) && !mandatory.contains (i))
|
||||
{
|
||||
Positions tryal = current;
|
||||
tryal.remove (i);
|
||||
unsigned int try_duplicates_count = count_duplicates (tryal);
|
||||
|
||||
/* We prefer 'try' to 'best' if it produces less duplicates,
|
||||
or if it produces the same number of duplicates but with
|
||||
a more efficient hash function. */
|
||||
if (try_duplicates_count < best_duplicates_count
|
||||
|| (try_duplicates_count == best_duplicates_count && i == 0))
|
||||
{
|
||||
best = tryal;
|
||||
best_duplicates_count = try_duplicates_count;
|
||||
}
|
||||
}
|
||||
|
||||
/* Stop removing positions when it gives no improvement. */
|
||||
if (best_duplicates_count > current_duplicates_count)
|
||||
break;
|
||||
|
||||
current = best;
|
||||
current_duplicates_count = best_duplicates_count;
|
||||
}
|
||||
|
||||
/* 4. Replace two positions by one, as long as this doesn't increase the
|
||||
duplicates count. */
|
||||
for (;;)
|
||||
{
|
||||
Positions best;
|
||||
unsigned int best_duplicates_count = UINT_MAX;
|
||||
|
||||
for (int i1 = imax; i1 >= 0; i1--)
|
||||
if (current.contains (i1) && !mandatory.contains (i1))
|
||||
for (int i2 = imax; i2 >= 0; i2--)
|
||||
if (current.contains (i2) && !mandatory.contains (i2) && i2 != i1)
|
||||
for (int i3 = imax; i3 >= 0; i3--)
|
||||
if (!current.contains (i3))
|
||||
{
|
||||
Positions tryal = current;
|
||||
tryal.remove (i1);
|
||||
tryal.remove (i2);
|
||||
tryal.add (i3);
|
||||
unsigned int try_duplicates_count =
|
||||
count_duplicates (tryal);
|
||||
|
||||
/* We prefer 'try' to 'best' if it produces less duplicates,
|
||||
or if it produces the same number of duplicates but with
|
||||
a more efficient hash function. */
|
||||
if (try_duplicates_count < best_duplicates_count
|
||||
|| (try_duplicates_count == best_duplicates_count
|
||||
&& (i1 == 0 || i2 == 0 || i3 > 0)))
|
||||
{
|
||||
best = tryal;
|
||||
best_duplicates_count = try_duplicates_count;
|
||||
}
|
||||
}
|
||||
|
||||
/* Stop removing positions when it gives no improvement. */
|
||||
if (best_duplicates_count > current_duplicates_count)
|
||||
break;
|
||||
|
||||
current = best;
|
||||
current_duplicates_count = best_duplicates_count;
|
||||
}
|
||||
|
||||
/* That's it. Hope it's good enough. */
|
||||
_key_positions = current;
|
||||
}
|
||||
|
||||
void
|
||||
Search::prepare ()
|
||||
{
|
||||
KeywordExt_List *temp;
|
||||
|
||||
preprepare ();
|
||||
|
||||
if (!option[POSITIONS])
|
||||
find_positions ();
|
||||
|
||||
/* Initialize each keyword's _selchars array. */
|
||||
init_selchars (option[ALLCHARS], _key_positions);
|
||||
|
||||
/* Check for duplicates, i.e. keywords with the same _selchars array
|
||||
(and - if !option[NOLENGTH] - also the same length).
|
||||
@@ -140,8 +343,12 @@ Search::prepare ()
|
||||
_total_duplicates);
|
||||
else
|
||||
{
|
||||
fprintf (stderr, "%d input keys have identical hash values,\ntry different key positions or use option -D.\n",
|
||||
fprintf (stderr, "%d input keys have identical hash values,\n",
|
||||
_total_duplicates);
|
||||
if (option[POSITIONS])
|
||||
fprintf (stderr, "try different key positions or use option -D.\n");
|
||||
else
|
||||
fprintf (stderr, "use option -D.\n");
|
||||
exit (1);
|
||||
}
|
||||
}
|
||||
@@ -313,7 +520,7 @@ Search::max_key_length () const
|
||||
int
|
||||
Search::get_max_keysig_size () const
|
||||
{
|
||||
return option[ALLCHARS] ? _max_key_len : option.get_max_keysig_size ();
|
||||
return option[ALLCHARS] ? _max_key_len : _key_positions.get_size();
|
||||
}
|
||||
|
||||
/* ---------------------- Finding good asso_values[] ----------------------- */
|
||||
@@ -758,9 +965,12 @@ Search::optimize ()
|
||||
else /* Yow, big problems. we're outta here! */
|
||||
{
|
||||
fprintf (stderr,
|
||||
"\nInternal error, duplicate value %d:\n"
|
||||
"try options -D or -m or -r, or use new key positions.\n\n",
|
||||
"\nInternal error, duplicate hash code value %d:\n",
|
||||
hashcode);
|
||||
if (option[POSITIONS])
|
||||
fprintf (stderr, "try options -m or -D or -r, or use new key positions.\n\n");
|
||||
else
|
||||
fprintf (stderr, "try options -m or -D or -r.\n\n");
|
||||
exit (1);
|
||||
}
|
||||
}
|
||||
|
||||
24
src/search.h
24
src/search.h
@@ -27,6 +27,7 @@
|
||||
#define search_h 1
|
||||
|
||||
#include "keyword-list.h"
|
||||
#include "options.h"
|
||||
#include "bool-array.h"
|
||||
|
||||
class Search
|
||||
@@ -36,6 +37,18 @@ public:
|
||||
~Search ();
|
||||
void optimize ();
|
||||
private:
|
||||
void preprepare ();
|
||||
|
||||
/* Initializes each keyword's _selchars array. */
|
||||
void init_selchars (bool use_all_chars, const Positions& positions) const;
|
||||
/* Deletes each keyword's _selchars array. */
|
||||
void delete_selchars () const;
|
||||
|
||||
/* Count the duplicate keywords that occur with a given set of positions. */
|
||||
unsigned int count_duplicates (const Positions& positions) const;
|
||||
|
||||
void find_positions ();
|
||||
|
||||
void prepare ();
|
||||
|
||||
/* Computes the sum of occurrences of the _selchars of a keyword. */
|
||||
@@ -90,16 +103,19 @@ public:
|
||||
/* Total number of keywords, counting duplicates. */
|
||||
int _total_keys;
|
||||
|
||||
/* Total number of duplicates that have been moved to _duplicate_link lists
|
||||
(not counting their representatives which stay on the main list). */
|
||||
int _total_duplicates;
|
||||
|
||||
/* Maximum length of the longest keyword. */
|
||||
int _max_key_len;
|
||||
|
||||
/* Minimum length of the shortest keyword. */
|
||||
int _min_key_len;
|
||||
|
||||
/* User-specified or computed key positions. */
|
||||
Positions _key_positions;
|
||||
|
||||
/* Total number of duplicates that have been moved to _duplicate_link lists
|
||||
(not counting their representatives which stay on the main list). */
|
||||
int _total_duplicates;
|
||||
|
||||
/* Size of alphabet. */
|
||||
int const _alpha_size;
|
||||
|
||||
|
||||
@@ -135,7 +135,7 @@ check-test:
|
||||
diff $(srcdir)/java.exp java.out
|
||||
$(GPERF) -n -k1-8 -l < $(srcdir)/modula2.gperf > modula2.out
|
||||
diff $(srcdir)/modula2.exp modula2.out
|
||||
$(GPERF) -D -p -t < $(srcdir)/c-parse.gperf > test-4.out
|
||||
$(GPERF) -D -p -t -k1,'$$' < $(srcdir)/c-parse.gperf > test-4.out
|
||||
diff $(srcdir)/test-4.exp test-4.out
|
||||
$(GPERF) -g -o -j1 -t -p -N is_reserved_word < $(srcdir)/gpc.gperf > gpc.out
|
||||
diff $(srcdir)/gpc.exp gpc.out
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
/* C code produced by gperf version 2.7.2 */
|
||||
/* Command-line: ../src/gperf -g -o -j1 -t -p -N is_reserved_word */
|
||||
/* Computed positions: -k'1,$' */
|
||||
|
||||
/* ISO Pascal 7185 reserved words.
|
||||
*
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/* C code produced by gperf version 2.7.2 */
|
||||
/* Command-line: ../src/gperf -D -p -t */
|
||||
/* Command-line: ../src/gperf -D -p -t -k'1,$' */
|
||||
|
||||
/* Command-line: gperf -L KR-C -F ', 0, 0' -p -j1 -i 1 -g -o -t -N is_reserved_word -k1,3,$ c-parse.gperf */
|
||||
struct resword { const char *name; short token; enum rid rid; };
|
||||
|
||||
Reference in New Issue
Block a user