1
0
mirror of https://git.savannah.gnu.org/git/gperf.git synced 2025-12-02 13:09:22 +00:00

Optimized choice during collision resolution.

This commit is contained in:
Bruno Haible
2003-03-14 11:01:01 +00:00
parent 19c69d8e5a
commit 7a8b43182a
5 changed files with 136 additions and 13 deletions

View File

@@ -27,6 +27,7 @@
#include <stdlib.h> /* declares exit(), rand(), srand() */
#include <string.h> /* declares memset(), memcmp() */
#include <time.h> /* declares time() */
#include <math.h> /* declares exp() */
#include <limits.h> /* defines INT_MIN, INT_MAX, UINT_MAX */
#include "options.h"
#include "hash-table.h"
@@ -904,7 +905,7 @@ Search::compute_hash (KeywordExt *keyword) const
const unsigned int *p = keyword->_selchars;
int i = keyword->_selchars_length;
for (; i > 0; p++, i--)
sum += _asso_values[*p];
sum += _asso_values[*p];
return keyword->_hash_value = sum;
}
@@ -962,20 +963,96 @@ compute_disjoint_union (const unsigned int *set_1, int size_1,
/* Sorts the given set in increasing frequency of _occurrences[]. */
inline void
Search::sort_by_occurrence (unsigned int *set, int len) const
Search::sort_by_occurrence (unsigned int *set, unsigned int len) const
{
/* Use bubble sort, since the set is typically short. */
for (int i = 1; i < len; i++)
for (unsigned int i = 1; i < len; i++)
{
int curr;
unsigned int j;
unsigned int tmp;
for (curr = i, tmp = set[curr];
curr > 0 && _occurrences[tmp] < _occurrences[set[curr-1]];
curr--)
set[curr] = set[curr - 1];
for (j = i, tmp = set[j];
j > 0 && _occurrences[tmp] < _occurrences[set[j-1]];
j--)
set[j] = set[j - 1];
set[curr] = tmp;
set[j] = tmp;
}
}
/* Computes the frequency of occurrence of a character among the keywords
up to the given keyword. */
inline unsigned int
Search::compute_occurrence (unsigned int c, KeywordExt *curr) const
{
unsigned int occurrence = 0;
for (KeywordExt_List *temp = _head; ; temp = temp->rest())
{
KeywordExt *keyword = temp->first();
int m = keyword->_selchars_length;
for (int i = 0; i < m; i++)
if (keyword->_selchars[i] == c)
{
occurrence++;
break;
}
if (keyword == curr)
break;
}
return occurrence;
}
/* Sorts the given set in increasing frequency of occurrences among the
keywords up to the given keyword. */
inline void
Search::sort_by_occurrence (unsigned int *set, unsigned int len, KeywordExt *curr) const
{
unsigned int occurrences[len];
for (unsigned int j = 0; j < len; j++)
occurrences[j] = 0;
for (KeywordExt_List *temp = _head; ; temp = temp->rest())
{
KeywordExt *keyword = temp->first();
int m = keyword->_selchars_length;
for (unsigned int j = 0; j < len; j++)
{
unsigned int c = set[j];
for (int i = 0; i < m; i++)
if (keyword->_selchars[i] == c)
{
occurrences[j]++;
break;
}
}
if (keyword == curr)
break;
}
/* Use bubble sort, since the set is typically short. */
for (unsigned int i = 1; i < len; i++)
{
unsigned int j;
unsigned int set_tmp, occ_tmp;
for (j = i, set_tmp = set[j], occ_tmp = occurrences[j];
j > 0 && occ_tmp < occurrences[j-1];
j--)
{
set[j] = set[j - 1];
occurrences[j] = occurrences[j - 1];
}
set[j] = set_tmp;
occurrences[j] = occ_tmp;
}
}
@@ -1152,12 +1229,26 @@ Search::find_asso_values ()
/* Sort by decreasing occurrence: Try least-used characters c first.
The idea is that this reduces the number of freshly introduced
collisions. */
sort_by_occurrence (union_set, union_set_length);
if (option[OPT_CHOICE])
sort_by_occurrence (union_set, union_set_length, curr);
else
sort_by_occurrence (union_set, union_set_length);
for (union_index = 0; union_index < union_set_length; union_index++)
{
c = union_set[union_index];
if (option[DEBUG])
{
unsigned int n = sp - stack + 1;
unsigned int o = compute_occurrence (c, curr);
fprintf (stderr, "Expected number of iterations between %g and %g\n",
exp (static_cast<double>((2*n-o)*o)
/ static_cast<double>(2*_max_hash_value)),
exp (static_cast<double>((2*n-o)*o)
/ static_cast<double>(2*_asso_value_max)));
}
/* Try various other values for _asso_values[c]. A value is
successful if, with it, the recomputed hash values for the
keywords from _head->first() to curr - inclusive - give no