1
0
mirror of https://git.savannah.gnu.org/git/gperf.git synced 2025-12-02 13:09:22 +00:00

Continuing rework.

This commit is contained in:
Bruno Haible
2003-01-06 11:43:41 +00:00
parent 72a3884ff9
commit b91e4511c0
3 changed files with 59 additions and 37 deletions

View File

@@ -1,5 +1,13 @@
2002-11-03 Bruno Haible <bruno@clisp.org> 2002-11-03 Bruno Haible <bruno@clisp.org>
* src/search.h (Search::compute_hash): Renamed from Search::hash.
(Search::compute_disjoint_union): Remove declaration.
(Search::sort_by_occurrence): Renamed from Search::sort_set.
* src/search.cc (Search::compute_hash): Renamed from Search::hash.
(compute_disjoint_union): Renamed from Search::compute_disjoint_union.
(Search::sort_by_occurrence): Renamed from Search::sort_set.
(Search::change): Simplify loop.
* src/search.h (Search::clear_determined): New declaration. * src/search.h (Search::clear_determined): New declaration.
* src/search.cc (Search::clear_determined): New method. * src/search.cc (Search::clear_determined): New method.
(Search::already_determined): Optimize. (Search::already_determined): Optimize.

View File

@@ -356,7 +356,7 @@ Search::reorder ()
} }
} }
/* Returns the length of entire key list. */ /* Returns the length of keyword list. */
int int
Search::keyword_list_length () Search::keyword_list_length ()
@@ -364,7 +364,7 @@ Search::keyword_list_length ()
return _list_len; return _list_len;
} }
/* Returns length of longest key read. */ /* Returns the maximum length of keywords. */
int int
Search::max_key_length () Search::max_key_length ()
@@ -372,7 +372,7 @@ Search::max_key_length ()
return _max_key_len; return _max_key_len;
} }
/* Returns number of key positions. */ /* Returns the number of key positions. */
int int
Search::get_max_keysig_size () Search::get_max_keysig_size ()
@@ -380,34 +380,44 @@ Search::get_max_keysig_size ()
return option[ALLCHARS] ? _max_key_len : option.get_max_keysig_size (); return option[ALLCHARS] ? _max_key_len : option.get_max_keysig_size ();
} }
/* Generate a key set's hash value. */ /* Computes a keyword's hash value, relative to the current _asso_values[],
and stores it in keyword->_hash_value.
This is called very frequently, and needs to be fast! */
inline int inline int
Search::hash (KeywordExt *key_node) Search::compute_hash (KeywordExt *keyword)
{ {
int sum = option[NOLENGTH] ? 0 : key_node->_allchars_length; int sum = option[NOLENGTH] ? 0 : keyword->_allchars_length;
const unsigned char *p = key_node->_selchars; const unsigned char *p = keyword->_selchars;
int i = key_node->_selchars_length; int i = keyword->_selchars_length;
for (; i > 0; p++, i--) for (; i > 0; p++, i--)
sum += _asso_values[*p]; sum += _asso_values[*p];
return key_node->_hash_value = sum; return keyword->_hash_value = sum;
} }
/* Merge two disjoint hash key multisets to form the ordered disjoint union of the sets. /* Computes the disjoint union of two multisets of characters, i.e.
(In a multiset, an element can occur multiple times.) the set of characters that are contained with a different multiplicity
Precondition: both set_1 and set_2 must be ordered. Returns the length in set_1 and set_2. This includes those characters which are contained
of the combined set. */ in one of the sets but not both.
Both sets set_1[0..size_1-1] and set_2[0..size_2-1] are given ordered.
The result, an ordered set (not multiset!) is stored in set_3[0...].
Returns the size of the resulting set. */
inline int inline int
Search::compute_disjoint_union (const unsigned char *set_1, int size_1, const unsigned char *set_2, int size_2, unsigned char *set_3) compute_disjoint_union (const unsigned char *set_1, int size_1,
const unsigned char *set_2, int size_2,
unsigned char *set_3)
{ {
unsigned char *base = set_3; unsigned char *base = set_3;
while (size_1 > 0 && size_2 > 0) while (size_1 > 0 && size_2 > 0)
if (*set_1 == *set_2) if (*set_1 == *set_2)
set_1++, size_1--, set_2++, size_2--; {
set_1++, size_1--;
set_2++, size_2--;
}
else else
{ {
unsigned char next; unsigned char next;
@@ -437,27 +447,23 @@ Search::compute_disjoint_union (const unsigned char *set_1, int size_1, const u
return set_3 - base; return set_3 - base;
} }
/* Sort the UNION_SET in increasing frequency of occurrence. /* Sorts the given set in increasing frequency of _occurrences[]. */
This speeds up later processing since we may assume the resulting
set (Set_3, in this case), is ordered. Uses insertion sort, since
the UNION_SET is typically short. */
inline void inline void
Search::sort_set (unsigned char *union_set, int len) Search::sort_by_occurrence (unsigned char *set, int len)
{ {
int i, j; /* Use bubble sort, since the set is typically short. */
for (int i = 1; i < len; i++)
for (i = 0, j = len - 1; i < j; i++)
{ {
int curr; int curr;
unsigned char tmp; unsigned char tmp;
for (curr = i + 1, tmp = union_set[curr]; for (curr = i, tmp = set[curr];
curr > 0 && _occurrences[tmp] < _occurrences[union_set[curr-1]]; curr > 0 && _occurrences[tmp] < _occurrences[set[curr-1]];
curr--) curr--)
union_set[curr] = union_set[curr - 1]; set[curr] = set[curr - 1];
union_set[curr] = tmp; set[curr] = tmp;
} }
} }
@@ -493,7 +499,7 @@ Search::affects_prev (unsigned char c, KeywordExt *curr)
for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest()) for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest())
{ {
KeywordExt *keyword = ptr->first(); KeywordExt *keyword = ptr->first();
if (_collision_detector->set_bit (hash (keyword)) if (_collision_detector->set_bit (compute_hash (keyword))
&& ++collisions >= _fewest_collisions) && ++collisions >= _fewest_collisions)
break; break;
if (keyword == curr) if (keyword == curr)
@@ -533,7 +539,7 @@ Search::change (KeywordExt *prior, KeywordExt *curr)
fflush (stderr); fflush (stderr);
} }
union_set_length = compute_disjoint_union (prior->_selchars, prior->_selchars_length, curr->_selchars, curr->_selchars_length, union_set); union_set_length = compute_disjoint_union (prior->_selchars, prior->_selchars_length, curr->_selchars, curr->_selchars_length, union_set);
sort_set (union_set, union_set_length); sort_by_occurrence (union_set, union_set_length);
/* Try changing some values, if change doesn't alter other values continue normal action. */ /* Try changing some values, if change doesn't alter other values continue normal action. */
_fewest_collisions++; _fewest_collisions++;
@@ -555,13 +561,11 @@ Search::change (KeywordExt *prior, KeywordExt *curr)
for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest()) for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest())
{ {
KeywordExt* keyword = ptr->first(); KeywordExt* keyword = ptr->first();
compute_hash (keyword);
if (keyword == curr) if (keyword == curr)
break; break;
hash (keyword);
} }
hash (curr);
if (option[DEBUG]) if (option[DEBUG])
{ {
fprintf (stderr, "** collision not resolved after %d iterations, %d duplicates remain, continuing...\n", fprintf (stderr, "** collision not resolved after %d iterations, %d duplicates remain, continuing...\n",
@@ -628,7 +632,7 @@ Search::optimize ()
{ {
KeywordExt *currkw = curr->first(); KeywordExt *currkw = curr->first();
hash (currkw); compute_hash (currkw);
for (KeywordExt_List *ptr = _head; ptr != curr; ptr = ptr->rest()) for (KeywordExt_List *ptr = _head; ptr != curr; ptr = ptr->rest())
{ {
@@ -649,7 +653,7 @@ Search::optimize ()
for (curr = _head; curr; curr = curr->rest()) for (curr = _head; curr; curr = curr->rest())
{ {
unsigned int hashcode = hash (curr->first()); unsigned int hashcode = compute_hash (curr->first());
if (_collision_detector->set_bit (hashcode)) if (_collision_detector->set_bit (hashcode))
{ {
if (option[DUP]) /* Keep track of this number... */ if (option[DUP]) /* Keep track of this number... */

View File

@@ -53,12 +53,22 @@ private:
/* Reorders the keyword list so as to minimize search times. */ /* Reorders the keyword list so as to minimize search times. */
void reorder (); void reorder ();
/* Returns the length of keyword list. */
int keyword_list_length (); int keyword_list_length ();
/* Returns the maximum length of keywords. */
int max_key_length (); int max_key_length ();
/* Returns the number of key positions. */
int get_max_keysig_size (); int get_max_keysig_size ();
int hash (KeywordExt *key_node);
static int compute_disjoint_union (const unsigned char *set_1, int size_1, const unsigned char *set_2, int size_2, unsigned char *set_3); /* Computes a keyword's hash value, relative to the current _asso_values[],
void sort_set (unsigned char *union_set, int len); and stores it in keyword->_hash_value. */
int compute_hash (KeywordExt *key_node);
/* Sorts the given set in increasing frequency of _occurrences[]. */
void sort_by_occurrence (unsigned char *set, int len);
bool affects_prev (unsigned char c, KeywordExt *curr); bool affects_prev (unsigned char c, KeywordExt *curr);
void change (KeywordExt *prior, KeywordExt *curr); void change (KeywordExt *prior, KeywordExt *curr);
void sort (); void sort ();