1
0
mirror of https://git.savannah.gnu.org/git/gperf.git synced 2025-12-02 13:09:22 +00:00

Move the handling of ALLCHARS (-k'*') into the Positions class.

This commit is contained in:
Bruno Haible
2003-04-07 09:50:11 +00:00
parent ec3d1127fa
commit 2535f34494
14 changed files with 398 additions and 241 deletions

View File

@@ -1,3 +1,55 @@
2002-12-12 Bruno Haible <bruno@clisp.org>
* src/positions.h (Positions::is_useall, Positions::set_useall,
Positions::iterator, Positions::reviterator): New method declarations.
(Positions::_useall): New field.
(PositionIterator): Make constructor private. Add a constructor and a
copy constructor.
(PositionIterator::remaining): New declaration.
(PositionReverseIterator): Make constructor private. Add a constructor
and a copy constructor.
(PositionReverseIterator::remaining): New declaration.
(PositionReverseIterator::_minindex): New field.
* src/positions.icc (Positions::Positions): Initialize _useall.
(Positions::operator=): Likewise.
(Positions::is_useall, Positions::set_useall): New methods.
(Positions::sort): Do nothing if _useall is set.
(Positions::iterator, Positions::reviterator): New methods.
(PositionIterator::PositionIterator): New constructor.
(PositionIterator::remaining): New method.
(PositionReverseIterator::PositionReverseIterator): New constructor.
(PositionReverseIterator::next): Use _minindex as bound.
(PositionReverseIterator::remaining): New method.
* src/positions.cc (Positions::add, Positions::remove): Reset the
useall flag.
(Positions::print): Handle the useall case.
* src/options.h (ALLCHARS): Remove.
* src/options.cc (Options::~Options): Update.
(Options::parse_options): Use Positions::set_useall().
* src/keyword.h (KeywordExt::init_selchars_tuple,
KeywordExt::init_selchars_multiset, KeywordExt::init_selchars_low):
Remove use_all_chars argument.
* src/keyword.cc (KeywordExt::init_selchars_low): Remove use_all_chars
argument. Tell the position iterator to stop at _allchars_length.
Remove special case code for -k'*'.
(KeywordExt::init_selchars_tuple, KeywordExt::init_selchars_multiset):
Remove use_all_chars argument.
* src/search.h (Search::init_selchars_tuple): Remove use_all_chars
argument.
(Search::init_selchars_multiset): Likewise.
* src/search.cc (Search::init_selchars_tuple): Remove use_all_chars
argument.
(Search::count_duplicates_tuple, Search::find_positions): Update.
(Search::compute_alpha_unify): Remove special case code for -k'*'.
(Search::init_selchars_multiset): Remove use_all_chars argument.
(Search::count_duplicates_multiset): Update.
(Search::find_alpha_inc): Remove special case code for -k'*'.
(Search::prepare): Update.
(Search::get_max_keysig_size): Update.
* src/output.cc (Output::output_hash_function): Remove special case
code for -k'*'.
* tests/chill.exp: Regenerated.
2002-12-11 Bruno Haible <bruno@clisp.org> 2002-12-11 Bruno Haible <bruno@clisp.org>
Change the positions to be 0-based, instead of 1-based. Change the positions to be 0-based, instead of 1-based.

View File

@@ -48,64 +48,62 @@ static inline void sort_char_set (unsigned int *base, int len)
} }
/* Initializes selchars and selchars_length. /* Initializes selchars and selchars_length.
The hash function will be computed as
asso_values[allchars[key_pos[0]]] + asso_values[allchars[key_pos[1]]] + ... General idea:
We compute selchars as the multiset The hash function will be computed as
{ allchars[key_pos[0]], allchars[key_pos[1]], ... } asso_values[allchars[key_pos[0]]] +
so that the hash function becomes asso_values[allchars[key_pos[1]]] + ...
asso_values[selchars[0]] + asso_values[selchars[1]] + ... We compute selchars as the multiset
{ allchars[key_pos[0]], allchars[key_pos[1]], ... }
so that the hash function becomes
asso_values[selchars[0]] + asso_values[selchars[1]] + ...
Furthermore we sort the selchars array, to ease detection of duplicates Furthermore we sort the selchars array, to ease detection of duplicates
later. later.
More in detail: The arguments alpha_unify (used for case-insensitive
hash functions) and alpha_inc (used to disambiguate permutations)
apply slight modifications. The hash function will be computed as
sum (j=0,1,...: k = key_pos[j]:
asso_values[alpha_unify[allchars[k]+alpha_inc[k]]])
+ (allchars_length if !option[NOLENGTH], 0 otherwise).
We compute selchars as the multiset
{ alpha_unify[allchars[k]+alpha_inc[k]] : j=0,1,..., k = key_pos[j] }
so that the hash function becomes
asso_values[selchars[0]] + asso_values[selchars[1]] + ...
+ (allchars_length if !option[NOLENGTH], 0 otherwise).
*/ */
unsigned int * unsigned int *
KeywordExt::init_selchars_low (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) KeywordExt::init_selchars_low (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc)
{ {
const char *k = _allchars; /* Iterate through the list of positions, initializing selchars
unsigned int *key_set = (via ptr). */
new unsigned int[(use_all_chars ? _allchars_length : positions.get_size ())]; PositionIterator iter = positions.iterator(_allchars_length);
unsigned int *key_set = new unsigned int[iter.remaining()];
unsigned int *ptr = key_set; unsigned int *ptr = key_set;
if (use_all_chars) for (int i; (i = iter.next ()) != PositionIterator::EOS; )
/* Use all the character positions in the KEY. */
for (int i = _allchars_length; i > 0; k++, i--)
{
unsigned int c = static_cast<unsigned char>(*k);
if (alpha_inc)
c += alpha_inc[k-_allchars];
if (alpha_unify)
c = alpha_unify[c];
*ptr = c;
ptr++;
}
else
/* Only use those character positions specified by the user. */
{ {
/* Iterate through the list of key_positions, initializing selchars unsigned int c;
(via ptr). */ if (i == Positions::LASTCHAR)
PositionIterator iter (positions); /* Special notation for last KEY position, i.e. '$'. */
c = static_cast<unsigned char>(_allchars[_allchars_length - 1]);
for (int i; (i = iter.next ()) != PositionIterator::EOS; ) else if (i < _allchars_length)
{ {
unsigned int c; /* Within range of KEY length, so we'll keep it. */
if (i == Positions::LASTCHAR) c = static_cast<unsigned char>(_allchars[i]);
/* Special notation for last KEY position, i.e. '$'. */ if (alpha_inc)
c = static_cast<unsigned char>(_allchars[_allchars_length - 1]); c += alpha_inc[i];
else if (i < _allchars_length)
{
/* Within range of KEY length, so we'll keep it. */
c = static_cast<unsigned char>(_allchars[i]);
if (alpha_inc)
c += alpha_inc[i];
}
else
/* Out of range of KEY length, so we'll just skip it. */
continue;
if (alpha_unify)
c = alpha_unify[c];
*ptr = c;
ptr++;
} }
else
/* Out of range of KEY length, the iterator should not have
produced this. */
abort ();
if (alpha_unify)
c = alpha_unify[c];
*ptr = c;
ptr++;
} }
_selchars = key_set; _selchars = key_set;
@@ -115,16 +113,16 @@ KeywordExt::init_selchars_low (bool use_all_chars, const Positions& positions, c
} }
void void
KeywordExt::init_selchars_tuple (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify) KeywordExt::init_selchars_tuple (const Positions& positions, const unsigned int *alpha_unify)
{ {
init_selchars_low (use_all_chars, positions, alpha_unify, NULL); init_selchars_low (positions, alpha_unify, NULL);
} }
void void
KeywordExt::init_selchars_multiset (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) KeywordExt::init_selchars_multiset (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc)
{ {
unsigned int *selchars = unsigned int *selchars =
init_selchars_low (use_all_chars, positions, alpha_unify, alpha_inc); init_selchars_low (positions, alpha_unify, alpha_inc);
/* Sort the selchars elements alphabetically. */ /* Sort the selchars elements alphabetically. */
sort_char_set (selchars, _selchars_length); sort_char_set (selchars, _selchars_length);

View File

@@ -68,9 +68,9 @@ struct KeywordExt : public Keyword
/* Methods depending on the keyposition list. */ /* Methods depending on the keyposition list. */
/* Initializes selchars and selchars_length, without reordering. */ /* Initializes selchars and selchars_length, without reordering. */
void init_selchars_tuple (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify); void init_selchars_tuple (const Positions& positions, const unsigned int *alpha_unify);
/* Initializes selchars and selchars_length, with reordering. */ /* Initializes selchars and selchars_length, with reordering. */
void init_selchars_multiset (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc); void init_selchars_multiset (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc);
/* Deletes selchars. */ /* Deletes selchars. */
void delete_selchars (); void delete_selchars ();
@@ -81,7 +81,7 @@ struct KeywordExt : public Keyword
int _final_index; int _final_index;
private: private:
unsigned int * init_selchars_low (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc); unsigned int * init_selchars_low (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc);
}; };
/* An abstract factory for creating Keyword instances. /* An abstract factory for creating Keyword instances.

View File

@@ -514,14 +514,14 @@ Options::~Options ()
_function_name, _hash_name, _wordlist_name, _slot_name, _function_name, _hash_name, _wordlist_name, _slot_name,
_initializer_suffix, _asso_iterations, _jump, _size_multiple, _initializer_suffix, _asso_iterations, _jump, _size_multiple,
_initial_asso_value, _delimiters, _total_switches); _initial_asso_value, _delimiters, _total_switches);
if (_option_word & ALLCHARS) if (_key_positions.is_useall())
fprintf (stderr, "all characters are used in the hash function\n"); fprintf (stderr, "all characters are used in the hash function\n");
else else
{ {
fprintf (stderr, "maximum keysig size = %d\nkey positions are: \n", fprintf (stderr, "maximum keysig size = %d\nkey positions are: \n",
_key_positions.get_size()); _key_positions.get_size());
PositionIterator iter (_key_positions); PositionIterator iter = _key_positions.iterator();
for (int pos; (pos = iter.next()) != PositionIterator::EOS; ) for (int pos; (pos = iter.next()) != PositionIterator::EOS; )
if (pos == Positions::LASTCHAR) if (pos == Positions::LASTCHAR)
fprintf (stderr, "$\n"); fprintf (stderr, "$\n");
@@ -773,9 +773,10 @@ Options::parse_options (int argc, char *argv[])
PositionStringParser sparser (/*getopt*/optarg, 1, Positions::MAX_KEY_POS, Positions::LASTCHAR, BAD_VALUE, EOS); PositionStringParser sparser (/*getopt*/optarg, 1, Positions::MAX_KEY_POS, Positions::LASTCHAR, BAD_VALUE, EOS);
if (/*getopt*/optarg [0] == '*') /* Use all the characters for hashing!!!! */ if (/*getopt*/optarg [0] == '*') /* Use all the characters for hashing!!!! */
_option_word |= ALLCHARS; _key_positions.set_useall(true);
else else
{ {
_key_positions.set_useall(false);
int *key_positions = _key_positions.pointer(); int *key_positions = _key_positions.pointer();
int *key_pos; int *key_pos;

View File

@@ -96,22 +96,19 @@ enum Option_Type
/* Use the given key positions. */ /* Use the given key positions. */
POSITIONS = 1 << 16, POSITIONS = 1 << 16,
/* Use all characters in hash function. */
ALLCHARS = 1 << 17,
/* Handle duplicate hash values for keywords. */ /* Handle duplicate hash values for keywords. */
DUP = 1 << 18, DUP = 1 << 17,
/* Don't include keyword length in hash computations. */ /* Don't include keyword length in hash computations. */
NOLENGTH = 1 << 19, NOLENGTH = 1 << 18,
/* Randomly initialize the associated values table. */ /* Randomly initialize the associated values table. */
RANDOM = 1 << 20, RANDOM = 1 << 19,
/* --- Informative output --- */ /* --- Informative output --- */
/* Enable debugging (prints diagnostics to stderr). */ /* Enable debugging (prints diagnostics to stderr). */
DEBUG = 1 << 21 DEBUG = 1 << 20
}; };
/* Class manager for gperf program Options. */ /* Class manager for gperf program Options. */
@@ -197,8 +194,7 @@ public:
/* Sets the delimiters string, if not already set. */ /* Sets the delimiters string, if not already set. */
void set_delimiters (const char *delimiters); void set_delimiters (const char *delimiters);
/* Returns key positions. /* Returns key positions. */
Only to be used if !options[ALLCHARS]. */
const Positions& get_key_positions () const; const Positions& get_key_positions () const;
private: private:

View File

@@ -135,8 +135,7 @@ Options::get_delimiters () const
return _delimiters; return _delimiters;
} }
/* Returns key positions. /* Returns key positions. */
Only to be used if !options[ALLCHARS]. */
INLINE const Positions& INLINE const Positions&
Options::get_key_positions () const Options::get_key_positions () const
{ {

View File

@@ -606,7 +606,7 @@ Output::output_hash_function () const
printf ("{\n"); printf ("{\n");
/* First the asso_values array. */ /* First the asso_values array. */
if (option[ALLCHARS] || _key_positions.get_size() > 0) if (_key_positions.get_size() > 0)
{ {
printf (" static %s%s asso_values[] =\n" printf (" static %s%s asso_values[] =\n"
" {", " {",
@@ -633,31 +633,7 @@ Output::output_hash_function () const
" };\n"); " };\n");
} }
if (option[ALLCHARS]) if (_key_positions.get_size() == 0)
{
/* User wants *all* characters considered in hash. */
printf (" register int hval = %s;\n\n"
" switch (%s)\n"
" {\n"
" default:\n",
option[NOLENGTH] ? "0" : "len",
option[NOLENGTH] ? "len" : "hval");
for (int i = _max_key_len; i > 0; i--)
{
printf (" case %d:\n"
" hval += asso_values[%sstr[%d]",
i, char_to_index, i - 1);
if (_alpha_inc[i - 1])
printf ("+%u", _alpha_inc[i - 1]);
printf ("];\n");
}
printf (" break;\n"
" }\n"
" return hval;\n");
}
else if (_key_positions.get_size() == 0)
{ {
/* Trivial case: No key positions at all. */ /* Trivial case: No key positions at all. */
printf (" return %s;\n", printf (" return %s;\n",
@@ -668,7 +644,7 @@ Output::output_hash_function () const
/* Iterate through the key positions. Remember that Positions::sort() /* Iterate through the key positions. Remember that Positions::sort()
has sorted them in decreasing order, with Positions::LASTCHAR coming has sorted them in decreasing order, with Positions::LASTCHAR coming
last. */ last. */
PositionIterator iter (_key_positions); PositionIterator iter = _key_positions.iterator(_max_key_len);
int key_pos; int key_pos;
/* Get the highest key position. */ /* Get the highest key position. */

View File

@@ -119,7 +119,7 @@ private:
int const _max_key_len; int const _max_key_len;
/* Minimum length of the shortest keyword. */ /* Minimum length of the shortest keyword. */
int const _min_key_len; int const _min_key_len;
/* Key positions. Only to be used if !options[ALLCHARS]. */ /* Key positions. */
Positions const _key_positions; Positions const _key_positions;
/* Adjustments to add to bytes add specific key positions. */ /* Adjustments to add to bytes add specific key positions. */
const unsigned int * const _alpha_inc; const unsigned int * const _alpha_inc;

View File

@@ -50,6 +50,8 @@ Positions::contains (int pos) const
void void
Positions::add (int pos) Positions::add (int pos)
{ {
set_useall (false);
unsigned int count = _size; unsigned int count = _size;
if (count == MAX_SIZE) if (count == MAX_SIZE)
@@ -78,6 +80,8 @@ Positions::add (int pos)
void void
Positions::remove (int pos) Positions::remove (int pos)
{ {
set_useall (false);
unsigned int count = _size; unsigned int count = _size;
if (count > 0) if (count > 0)
{ {
@@ -120,41 +124,46 @@ Positions::remove (int pos)
void void
Positions::print () const Positions::print () const
{ {
bool first = true; if (_useall)
bool seen_LASTCHAR = false; printf ("*");
unsigned int count = _size; else
const int *p = _positions + _size - 1;
for (; count > 0; p--)
{ {
count--; bool first = true;
if (*p == LASTCHAR) bool seen_LASTCHAR = false;
seen_LASTCHAR = true; unsigned int count = _size;
else const int *p = _positions + _size - 1;
for (; count > 0; p--)
{
count--;
if (*p == LASTCHAR)
seen_LASTCHAR = true;
else
{
if (!first)
printf (",");
printf ("%d", *p + 1);
if (count > 0 && p[-1] == *p + 1)
{
printf ("-");
do
{
p--;
count--;
}
while (count > 0 && p[-1] == *p + 1);
printf ("%d", *p + 1);
}
first = false;
}
}
if (seen_LASTCHAR)
{ {
if (!first) if (!first)
printf (","); printf (",");
printf ("%d", *p + 1); printf ("$");
if (count > 0 && p[-1] == *p + 1)
{
printf ("-");
do
{
p--;
count--;
}
while (count > 0 && p[-1] == *p + 1);
printf ("%d", *p + 1);
}
first = false;
} }
} }
if (seen_LASTCHAR)
{
if (!first)
printf (",");
printf ("$");
}
} }
/* ------------------------------------------------------------------------- */ /* ------------------------------------------------------------------------- */

View File

@@ -57,10 +57,12 @@ public:
Positions& operator= (const Positions& src); Positions& operator= (const Positions& src);
/* Accessors. */ /* Accessors. */
bool is_useall () const;
int operator[] (unsigned int index) const; int operator[] (unsigned int index) const;
unsigned int get_size () const; unsigned int get_size () const;
/* Write access. */ /* Write access. */
void set_useall (bool useall);
int * pointer (); int * pointer ();
void set_size (unsigned int size); void set_size (unsigned int size);
@@ -68,6 +70,17 @@ public:
Returns true if there are no duplicates, false otherwise. */ Returns true if there are no duplicates, false otherwise. */
bool sort (); bool sort ();
/* Creates an iterator, returning the positions in descending order. */
PositionIterator iterator () const;
/* Creates an iterator, returning the positions in descending order,
that apply to strings of length <= maxlen. */
PositionIterator iterator (int maxlen) const;
/* Creates an iterator, returning the positions in ascending order. */
PositionReverseIterator reviterator () const;
/* Creates an iterator, returning the positions in ascending order,
that apply to strings of length <= maxlen. */
PositionReverseIterator reviterator (int maxlen) const;
/* Set operations. Assumes the array is in reverse order. */ /* Set operations. Assumes the array is in reverse order. */
bool contains (int pos) const; bool contains (int pos) const;
void add (int pos); void add (int pos);
@@ -77,6 +90,8 @@ public:
void print () const; void print () const;
private: private:
/* The special case denoted by '*'. */
bool _useall;
/* Number of positions. */ /* Number of positions. */
unsigned int _size; unsigned int _size;
/* Array of positions. 0 for the first char, 1 for the second char etc., /* Array of positions. 0 for the first char, 1 for the second char etc.,
@@ -88,9 +103,10 @@ private:
class PositionIterator class PositionIterator
{ {
friend class Positions;
public: public:
/* Initializes an iterator through POSITIONS. */ /* Copy constructor. */
PositionIterator (Positions const& positions); PositionIterator (const PositionIterator& src);
/* End of iteration marker. */ /* End of iteration marker. */
enum { EOS = -2 }; enum { EOS = -2 };
@@ -98,7 +114,16 @@ public:
/* Retrieves the next position, or EOS past the end. */ /* Retrieves the next position, or EOS past the end. */
int next (); int next ();
/* Returns the number of remaining positions, i.e. how often next() will
return a value != EOS. */
unsigned int remaining () const;
private: private:
/* Initializes an iterator through POSITIONS. */
PositionIterator (Positions const& positions);
/* Initializes an iterator through POSITIONS, ignoring positions >= maxlen. */
PositionIterator (Positions const& positions, int maxlen);
const Positions& _set; const Positions& _set;
unsigned int _index; unsigned int _index;
}; };
@@ -108,9 +133,10 @@ private:
class PositionReverseIterator class PositionReverseIterator
{ {
friend class Positions;
public: public:
/* Initializes an iterator through POSITIONS. */ /* Copy constructor. */
PositionReverseIterator (Positions const& positions); PositionReverseIterator (const PositionReverseIterator& src);
/* End of iteration marker. */ /* End of iteration marker. */
enum { EOS = -2 }; enum { EOS = -2 };
@@ -118,9 +144,19 @@ public:
/* Retrieves the next position, or EOS past the end. */ /* Retrieves the next position, or EOS past the end. */
int next (); int next ();
/* Returns the number of remaining positions, i.e. how often next() will
return a value != EOS. */
unsigned int remaining () const;
private: private:
/* Initializes an iterator through POSITIONS. */
PositionReverseIterator (Positions const& positions);
/* Initializes an iterator through POSITIONS, ignoring positions >= maxlen. */
PositionReverseIterator (Positions const& positions, int maxlen);
const Positions& _set; const Positions& _set;
unsigned int _index; unsigned int _index;
unsigned int _minindex;
}; };
#ifdef __OPTIMIZE__ #ifdef __OPTIMIZE__

View File

@@ -30,20 +30,23 @@
INLINE INLINE
Positions::Positions () Positions::Positions ()
: _size (0) : _useall (false),
_size (0)
{ {
} }
INLINE INLINE
Positions::Positions (int pos1) Positions::Positions (int pos1)
: _size (1) : _useall (false),
_size (1)
{ {
_positions[0] = pos1; _positions[0] = pos1;
} }
INLINE INLINE
Positions::Positions (int pos1, int pos2) Positions::Positions (int pos1, int pos2)
: _size (2) : _useall (false),
_size (2)
{ {
_positions[0] = pos1; _positions[0] = pos1;
_positions[1] = pos2; _positions[1] = pos2;
@@ -53,7 +56,8 @@ Positions::Positions (int pos1, int pos2)
INLINE INLINE
Positions::Positions (const Positions& src) Positions::Positions (const Positions& src)
: _size (src._size) : _useall (src._useall),
_size (src._size)
{ {
memcpy (_positions, src._positions, _size * sizeof (_positions[0])); memcpy (_positions, src._positions, _size * sizeof (_positions[0]));
} }
@@ -63,6 +67,7 @@ Positions::Positions (const Positions& src)
INLINE Positions& INLINE Positions&
Positions::operator= (const Positions& src) Positions::operator= (const Positions& src)
{ {
_useall = src._useall;
_size = src._size; _size = src._size;
memcpy (_positions, src._positions, _size * sizeof (_positions[0])); memcpy (_positions, src._positions, _size * sizeof (_positions[0]));
return *this; return *this;
@@ -70,6 +75,12 @@ Positions::operator= (const Positions& src)
/* Accessors. */ /* Accessors. */
INLINE bool
Positions::is_useall () const
{
return _useall;
}
INLINE int INLINE int
Positions::operator[] (unsigned int index) const Positions::operator[] (unsigned int index) const
{ {
@@ -84,6 +95,20 @@ Positions::get_size () const
/* Write access. */ /* Write access. */
INLINE void
Positions::set_useall (bool useall)
{
_useall = useall;
if (useall)
{
/* The positions are 0, 1, ..., MAX_KEY_POS-1, in descending order. */
_size = MAX_KEY_POS;
int *ptr = _positions;
for (int i = MAX_KEY_POS - 1; i >= 0; i--)
*ptr++ = i;
}
}
INLINE int * INLINE int *
Positions::pointer () Positions::pointer ()
{ {
@@ -101,6 +126,9 @@ Positions::set_size (unsigned int size)
INLINE bool INLINE bool
Positions::sort () Positions::sort ()
{ {
if (_useall)
return true;
/* Bubble sort. */ /* Bubble sort. */
bool duplicate_free = true; bool duplicate_free = true;
int *base = _positions; int *base = _positions;
@@ -121,6 +149,36 @@ Positions::sort ()
return duplicate_free; return duplicate_free;
} }
/* Creates an iterator, returning the positions in descending order. */
INLINE PositionIterator
Positions::iterator () const
{
return PositionIterator (*this);
}
/* Creates an iterator, returning the positions in descending order,
that apply to strings of length <= maxlen. */
INLINE PositionIterator
Positions::iterator (int maxlen) const
{
return PositionIterator (*this, maxlen);
}
/* Creates an iterator, returning the positions in ascending order. */
INLINE PositionReverseIterator
Positions::reviterator () const
{
return PositionReverseIterator (*this);
}
/* Creates an iterator, returning the positions in ascending order,
that apply to strings of length <= maxlen. */
INLINE PositionReverseIterator
Positions::reviterator (int maxlen) const
{
return PositionReverseIterator (*this, maxlen);
}
/* ------------------------- Class PositionIterator ------------------------ */ /* ------------------------- Class PositionIterator ------------------------ */
/* Initializes an iterator through POSITIONS. */ /* Initializes an iterator through POSITIONS. */
@@ -131,6 +189,24 @@ PositionIterator::PositionIterator (Positions const& positions)
{ {
} }
/* Initializes an iterator through POSITIONS, ignoring positions >= maxlen. */
INLINE
PositionIterator::PositionIterator (Positions const& positions, int maxlen)
: _set (positions)
{
if (positions._useall)
_index = (maxlen <= Positions::MAX_KEY_POS ? Positions::MAX_KEY_POS - maxlen : 0);
else
{
unsigned int index;
for (index = 0;
index < positions._size && positions._positions[index] >= maxlen;
index++)
;
_index = index;
}
}
/* Retrieves the next position, or EOS past the end. */ /* Retrieves the next position, or EOS past the end. */
INLINE int INLINE int
PositionIterator::next () PositionIterator::next ()
@@ -138,19 +214,72 @@ PositionIterator::next ()
return (_index < _set._size ? _set._positions[_index++] : EOS); return (_index < _set._size ? _set._positions[_index++] : EOS);
} }
/* Returns the number of remaining positions, i.e. how often next() will
return a value != EOS. */
INLINE unsigned int
PositionIterator::remaining () const
{
return _set._size - _index;
}
/* Copy constructor. */
INLINE
PositionIterator::PositionIterator (const PositionIterator& src)
: _set (src._set),
_index (src._index)
{
}
/* --------------------- Class PositionReverseIterator --------------------- */ /* --------------------- Class PositionReverseIterator --------------------- */
/* Initializes an iterator through POSITIONS. */ /* Initializes an iterator through POSITIONS. */
INLINE INLINE
PositionReverseIterator::PositionReverseIterator (Positions const& positions) PositionReverseIterator::PositionReverseIterator (Positions const& positions)
: _set (positions),
_index (_set._size),
_minindex (0)
{
}
/* Initializes an iterator through POSITIONS, ignoring positions >= maxlen. */
INLINE
PositionReverseIterator::PositionReverseIterator (Positions const& positions, int maxlen)
: _set (positions), : _set (positions),
_index (_set._size) _index (_set._size)
{ {
if (positions._useall)
_minindex = (maxlen <= Positions::MAX_KEY_POS ? Positions::MAX_KEY_POS - maxlen : 0);
else
{
unsigned int index;
for (index = 0;
index < positions._size && positions._positions[index] >= maxlen;
index++)
;
_minindex = index;
}
} }
/* Retrieves the next position, or EOS past the end. */ /* Retrieves the next position, or EOS past the end. */
INLINE int INLINE int
PositionReverseIterator::next () PositionReverseIterator::next ()
{ {
return (_index > 0 ? _set._positions[--_index] : EOS); return (_index > _minindex ? _set._positions[--_index] : EOS);
}
/* Returns the number of remaining positions, i.e. how often next() will
return a value != EOS. */
INLINE unsigned int
PositionReverseIterator::remaining () const
{
return _index - _minindex;
}
/* Copy constructor. */
INLINE
PositionReverseIterator::PositionReverseIterator (const PositionReverseIterator& src)
: _set (src._set),
_index (src._index),
_minindex (src._minindex)
{
} }

View File

@@ -175,10 +175,10 @@ Search::compute_alpha_unify () const
/* Initializes each keyword's _selchars array. */ /* Initializes each keyword's _selchars array. */
void void
Search::init_selchars_tuple (bool use_all_chars, const Positions& positions) const Search::init_selchars_tuple (const Positions& positions) const
{ {
for (KeywordExt_List *temp = _head; temp; temp = temp->rest()) for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
temp->first()->init_selchars_tuple(use_all_chars, positions, _alpha_unify); temp->first()->init_selchars_tuple(positions, _alpha_unify);
} }
/* Deletes each keyword's _selchars array. */ /* Deletes each keyword's _selchars array. */
@@ -199,7 +199,7 @@ Search::count_duplicates_tuple (const Positions& positions) const
/* Run through the keyword list and count the duplicates incrementally. /* Run through the keyword list and count the duplicates incrementally.
The result does not depend on the order of the keyword list, thanks to The result does not depend on the order of the keyword list, thanks to
the formula above. */ the formula above. */
init_selchars_tuple (option[ALLCHARS], positions); init_selchars_tuple (positions);
unsigned int count = 0; unsigned int count = 0;
{ {
@@ -411,7 +411,7 @@ Search::find_positions ()
{ {
/* Print the result. */ /* Print the result. */
fprintf (stderr, "\nComputed positions: "); fprintf (stderr, "\nComputed positions: ");
PositionReverseIterator iter (_key_positions); PositionReverseIterator iter = _key_positions.reviterator();
bool seen_lastchar = false; bool seen_lastchar = false;
bool first = true; bool first = true;
for (int i; (i = iter.next ()) != PositionReverseIterator::EOS; ) for (int i; (i = iter.next ()) != PositionReverseIterator::EOS; )
@@ -482,49 +482,29 @@ Search::compute_alpha_unify (const Positions& positions, const unsigned int *alp
{ {
KeywordExt *keyword = temp->first(); KeywordExt *keyword = temp->first();
if (option[ALLCHARS]) /* Iterate through the selected character positions. */
/* Iterate through all character positions. */ PositionIterator iter = positions.iterator(keyword->_allchars_length);
for (int i = 0; i < keyword->_allchars_length; i++)
{
unsigned int c = static_cast<unsigned char>(keyword->_allchars[i]);
if (c >= 'A' && c <= 'Z')
c += 'a' - 'A';
if (c >= 'a' && c <= 'z')
{
c += alpha_inc[i];
/* Unify c with c - ('a'-'A'). */
unsigned int d = alpha_unify[c];
unsigned int b = c - ('a'-'A');
for (int a = b; a >= 0 && alpha_unify[a] == b; a -= ('a'-'A'))
alpha_unify[a] = d;
}
}
else
{
/* Iterate through the selected character positions. */
PositionIterator iter (positions);
for (int i; (i = iter.next ()) != PositionIterator::EOS; ) for (int i; (i = iter.next ()) != PositionIterator::EOS; )
{
unsigned int c;
if (i == Positions::LASTCHAR)
c = static_cast<unsigned char>(keyword->_allchars[keyword->_allchars_length - 1]);
else if (i < keyword->_allchars_length)
c = static_cast<unsigned char>(keyword->_allchars[i]);
else
abort ();
if (c >= 'A' && c <= 'Z')
c += 'a' - 'A';
if (c >= 'a' && c <= 'z')
{ {
unsigned int c; if (i != Positions::LASTCHAR)
if (i == Positions::LASTCHAR) c += alpha_inc[i];
c = static_cast<unsigned char>(keyword->_allchars[keyword->_allchars_length - 1]); /* Unify c with c - ('a'-'A'). */
else if (i < keyword->_allchars_length) unsigned int d = alpha_unify[c];
c = static_cast<unsigned char>(keyword->_allchars[i]); unsigned int b = c - ('a'-'A');
else for (int a = b; a >= 0 && alpha_unify[a] == b; a -= ('a'-'A'))
continue; alpha_unify[a] = d;
if (c >= 'A' && c <= 'Z')
c += 'a' - 'A';
if (c >= 'a' && c <= 'z')
{
if (i != Positions::LASTCHAR)
c += alpha_inc[i];
/* Unify c with c - ('a'-'A'). */
unsigned int d = alpha_unify[c];
unsigned int b = c - ('a'-'A');
for (int a = b; a >= 0 && alpha_unify[a] == b; a -= ('a'-'A'))
alpha_unify[a] = d;
}
} }
} }
} }
@@ -537,10 +517,10 @@ Search::compute_alpha_unify (const Positions& positions, const unsigned int *alp
/* Initializes each keyword's _selchars array. */ /* Initializes each keyword's _selchars array. */
void void
Search::init_selchars_multiset (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) const Search::init_selchars_multiset (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) const
{ {
for (KeywordExt_List *temp = _head; temp; temp = temp->rest()) for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
temp->first()->init_selchars_multiset(use_all_chars, positions, alpha_unify, alpha_inc); temp->first()->init_selchars_multiset(positions, alpha_unify, alpha_inc);
} }
/* Count the duplicate keywords that occur with the given set of positions /* Count the duplicate keywords that occur with the given set of positions
@@ -554,7 +534,7 @@ Search::count_duplicates_multiset (const unsigned int *alpha_inc) const
/* Run through the keyword list and count the duplicates incrementally. /* Run through the keyword list and count the duplicates incrementally.
The result does not depend on the order of the keyword list, thanks to The result does not depend on the order of the keyword list, thanks to
the formula above. */ the formula above. */
init_selchars_multiset (option[ALLCHARS], _key_positions, init_selchars_multiset (_key_positions,
compute_alpha_unify (_key_positions, alpha_inc), compute_alpha_unify (_key_positions, alpha_inc),
alpha_inc); alpha_inc);
@@ -596,35 +576,34 @@ Search::find_alpha_inc ()
{ {
/* Look which _alpha_inc[i] we are free to increment. */ /* Look which _alpha_inc[i] we are free to increment. */
unsigned int nindices; unsigned int nindices;
if (option[ALLCHARS]) {
nindices = _max_key_len; nindices = 0;
else PositionIterator iter = _key_positions.iterator(_max_key_len);
{ for (;;)
/* Ignore Positions::LASTCHAR. Remember that since Positions are {
sorted in decreasing order, Positions::LASTCHAR comes last. */ int key_pos = iter.next ();
nindices = (_key_positions.get_size() == 0 if (key_pos == PositionIterator::EOS)
|| _key_positions[_key_positions.get_size() - 1] break;
!= Positions::LASTCHAR if (key_pos != Positions::LASTCHAR)
? _key_positions.get_size() nindices++;
: _key_positions.get_size() - 1); }
} }
unsigned int indices[nindices]; unsigned int indices[nindices];
if (option[ALLCHARS]) {
for (unsigned int j = 0; j < nindices; j++) unsigned int j = 0;
indices[j] = j; PositionIterator iter = _key_positions.iterator(_max_key_len);
else for (;;)
{ {
PositionIterator iter (_key_positions); int key_pos = iter.next ();
for (unsigned int j = 0; j < nindices; j++) if (key_pos == PositionIterator::EOS)
{ break;
int key_pos = iter.next (); if (key_pos != Positions::LASTCHAR)
if (key_pos == PositionIterator::EOS indices[j++] = key_pos;
|| key_pos == Positions::LASTCHAR) }
abort (); if (!(j == nindices))
indices[j] = key_pos; abort ();
} }
}
/* Perform several rounds of searching for a good alpha increment. /* Perform several rounds of searching for a good alpha increment.
Each round reduces the number of artificial collisions by adding Each round reduces the number of artificial collisions by adding
@@ -670,32 +649,16 @@ Search::find_alpha_inc ()
{ {
/* Print the result. */ /* Print the result. */
fprintf (stderr, "\nComputed alpha increments: "); fprintf (stderr, "\nComputed alpha increments: ");
if (option[ALLCHARS]) bool first = true;
{ for (unsigned int j = nindices; j-- > 0; )
bool first = true; if (current[indices[j]] != 0)
for (unsigned int j = 0; j < nindices; j++) {
if (current[indices[j]] != 0) if (!first)
{ fprintf (stderr, ", ");
if (!first) fprintf (stderr, "%u:+%u",
fprintf (stderr, ", "); indices[j] + 1, current[indices[j]]);
fprintf (stderr, "%u:+%u", first = false;
indices[j] + 1, current[indices[j]]); }
first = false;
}
}
else
{
bool first = true;
for (unsigned int j = nindices; j-- > 0; )
if (current[indices[j]] != 0)
{
if (!first)
fprintf (stderr, ", ");
fprintf (stderr, "%u:+%u",
indices[j] + 1, current[indices[j]]);
first = false;
}
}
fprintf (stderr, "\n"); fprintf (stderr, "\n");
} }
} }
@@ -713,8 +676,7 @@ Search::prepare ()
KeywordExt_List *temp; KeywordExt_List *temp;
/* Initialize each keyword's _selchars array. */ /* Initialize each keyword's _selchars array. */
init_selchars_multiset(option[ALLCHARS], _key_positions, init_selchars_multiset(_key_positions, _alpha_unify, _alpha_inc);
_alpha_unify, _alpha_inc);
/* Check for duplicates, i.e. keywords with the same _selchars array /* Check for duplicates, i.e. keywords with the same _selchars array
(and - if !option[NOLENGTH] - also the same length). (and - if !option[NOLENGTH] - also the same length).
@@ -831,7 +793,7 @@ Search::max_key_length () const
int int
Search::get_max_keysig_size () const Search::get_max_keysig_size () const
{ {
return option[ALLCHARS] ? _max_key_len : _key_positions.get_size(); return _key_positions.is_useall() ? _max_key_len : _key_positions.get_size();
} }
/* ---------------------- Finding good asso_values[] ----------------------- */ /* ---------------------- Finding good asso_values[] ----------------------- */

View File

@@ -50,7 +50,7 @@ private:
unsigned int * compute_alpha_unify () const; unsigned int * compute_alpha_unify () const;
/* Initializes each keyword's _selchars array. */ /* Initializes each keyword's _selchars array. */
void init_selchars_tuple (bool use_all_chars, const Positions& positions) const; void init_selchars_tuple (const Positions& positions) const;
/* Deletes each keyword's _selchars array. */ /* Deletes each keyword's _selchars array. */
void delete_selchars () const; void delete_selchars () const;
@@ -67,7 +67,7 @@ private:
unsigned int * compute_alpha_unify (const Positions& positions, const unsigned int *alpha_inc) const; unsigned int * compute_alpha_unify (const Positions& positions, const unsigned int *alpha_inc) const;
/* Initializes each keyword's _selchars array. */ /* Initializes each keyword's _selchars array. */
void init_selchars_multiset (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) const; void init_selchars_multiset (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) const;
/* Count the duplicate keywords that occur with the given set of positions /* Count the duplicate keywords that occur with the given set of positions
and a given alpha_inc[] array. */ and a given alpha_inc[] array. */

View File

@@ -55,7 +55,6 @@ hash (str, len)
switch (hval) switch (hval)
{ {
default: default:
case 30:
hval += asso_values[(unsigned char)str[29]]; hval += asso_values[(unsigned char)str[29]];
case 29: case 29:
hval += asso_values[(unsigned char)str[28]]; hval += asso_values[(unsigned char)str[28]];