mirror of
https://git.savannah.gnu.org/git/gperf.git
synced 2025-12-02 13:09:22 +00:00
Optimize: Use a hash table in compute_partition.
This reduces the execution time of gperf on large inputs by ca. 30%. * autogen.sh (GNULIB_MODULES): Add map-c++, hash-map. * src/keyword.h: Include <stddef.h>. (struct KeywordExt): Add fields _undetermined_chars, _undetermined_chars_length, _undetermined_chars_hashcode. * src/search.cc: Include gl_map.hh, gl_hash_map.h. (Search::prepare_asso_values): Initialize the _undetermined_chars field. (struct EquivalenceClass): Remove the fields _undetermined_chars, _undetermined_chars_length. (undetermined_equals, undetermined_hashcode): New functions. (Search::compute_partition): Initialize the _undetermined_chars* fields of all keywords. Use a hash map instead of a loop over the equivalence classes. (Search::find_good_asso_values): Deallocate the _undetermined_chars field.
This commit is contained in:
18
ChangeLog
18
ChangeLog
@@ -1,3 +1,21 @@
|
||||
2025-04-19 Bruno Haible <bruno@clisp.org>
|
||||
|
||||
Optimize: Use a hash table in compute_partition.
|
||||
This reduces the execution time of gperf on large inputs by ca. 30%.
|
||||
* autogen.sh (GNULIB_MODULES): Add map-c++, hash-map.
|
||||
* src/keyword.h: Include <stddef.h>.
|
||||
(struct KeywordExt): Add fields _undetermined_chars,
|
||||
_undetermined_chars_length, _undetermined_chars_hashcode.
|
||||
* src/search.cc: Include gl_map.hh, gl_hash_map.h.
|
||||
(Search::prepare_asso_values): Initialize the _undetermined_chars field.
|
||||
(struct EquivalenceClass): Remove the fields _undetermined_chars,
|
||||
_undetermined_chars_length.
|
||||
(undetermined_equals, undetermined_hashcode): New functions.
|
||||
(Search::compute_partition): Initialize the _undetermined_chars* fields
|
||||
of all keywords. Use a hash map instead of a loop over the equivalence
|
||||
classes.
|
||||
(Search::find_good_asso_values): Deallocate the _undetermined_chars field.
|
||||
|
||||
2025-04-19 Bruno Haible <bruno@clisp.org>
|
||||
|
||||
Optimize: Make Bool_Array take less memory.
|
||||
|
||||
@@ -66,6 +66,7 @@ if test $skip_gnulib = false; then
|
||||
GNULIB_MODULES='
|
||||
filename
|
||||
getopt-gnu
|
||||
map-c++ hash-map
|
||||
read-file
|
||||
package-version
|
||||
'
|
||||
|
||||
28
lib/.gitignore
vendored
28
lib/.gitignore
vendored
@@ -5,12 +5,19 @@
|
||||
/alloca.in.h
|
||||
/arg-nonnull.h
|
||||
/assert.in.h
|
||||
/attribute.h
|
||||
/basename-lgpl.c
|
||||
/basename-lgpl.h
|
||||
/c++defs.h
|
||||
/cloexec.c
|
||||
/cloexec.h
|
||||
/close.c
|
||||
/dup2.c
|
||||
/errno.in.h
|
||||
/error.c
|
||||
/error.in.h
|
||||
/exitfail.c
|
||||
/exitfail.h
|
||||
/fcntl.c
|
||||
/fcntl.in.h
|
||||
/fd-hook.c
|
||||
@@ -31,9 +38,22 @@
|
||||
/getopt.in.h
|
||||
/getopt1.c
|
||||
/getopt_int.h
|
||||
/getprogname.c
|
||||
/getprogname.h
|
||||
/gettext.h
|
||||
/gl_anyhash1.h
|
||||
/gl_anyhash2.h
|
||||
/gl_anyhash_primes.h
|
||||
/gl_hash_map.c
|
||||
/gl_hash_map.h
|
||||
/gl_map.c
|
||||
/gl_map.h
|
||||
/gl_map.hh
|
||||
/gl_xmap.c
|
||||
/gl_xmap.h
|
||||
/idx.h
|
||||
/intprops-internal.h
|
||||
/intprops.h
|
||||
/inttypes.in.h
|
||||
/limits.in.h
|
||||
/lseek.c
|
||||
@@ -51,6 +71,7 @@
|
||||
/read-file.c
|
||||
/read-file.h
|
||||
/realloc.c
|
||||
/size_max.h
|
||||
/stat-time.c
|
||||
/stat-time.h
|
||||
/stat-w32.c
|
||||
@@ -65,6 +86,9 @@
|
||||
/stdio.in.h
|
||||
/stdlib.c
|
||||
/stdlib.in.h
|
||||
/strerror-override.c
|
||||
/strerror-override.h
|
||||
/strerror.c
|
||||
/string.in.h
|
||||
/sys_stat.in.h
|
||||
/sys_types.in.h
|
||||
@@ -74,7 +98,11 @@
|
||||
/verify.h
|
||||
/warn-on-use.h
|
||||
/wchar.in.h
|
||||
/xalloc-die.c
|
||||
/xalloc-oversized.h
|
||||
/xalloc.h
|
||||
/xsize.c
|
||||
/xsize.h
|
||||
|
||||
# Files generated by the autotools:
|
||||
/aclocal.m4
|
||||
|
||||
@@ -24,6 +24,8 @@
|
||||
#ifndef keyword_h
|
||||
#define keyword_h 1
|
||||
|
||||
#include <stddef.h> /* defines size_t */
|
||||
|
||||
/* Class defined in "positions.h". */
|
||||
class Positions;
|
||||
|
||||
@@ -72,6 +74,13 @@ struct KeywordExt : public Keyword
|
||||
/* Deletes selchars. */
|
||||
void delete_selchars ();
|
||||
|
||||
/* Data members used by the algorithm, specifically compute_partition. */
|
||||
/* The undetermined selected characters for this keyword, as a
|
||||
canonically reordered multiset. */
|
||||
unsigned int * _undetermined_chars;
|
||||
unsigned int _undetermined_chars_length;
|
||||
size_t _undetermined_chars_hashcode;
|
||||
|
||||
/* Data members used by the algorithm. */
|
||||
int _hash_value; /* Hash value for the keyword. */
|
||||
|
||||
|
||||
@@ -28,7 +28,9 @@
|
||||
#include <string.h> /* declares memset(), memcmp() */
|
||||
#include <time.h> /* declares time() */
|
||||
#include <math.h> /* declares exp() */
|
||||
#include <limits.h> /* defines INT_MIN, INT_MAX, UINT_MAX */
|
||||
#include <limits.h> /* defines INT_MIN, INT_MAX, UINT_MAX, CHAR_BIT */
|
||||
#include "gl_map.hh"
|
||||
#include "gl_hash_map.h"
|
||||
#include "options.h"
|
||||
#include "hash-table.h"
|
||||
|
||||
@@ -839,6 +841,13 @@ Search::prepare_asso_values ()
|
||||
/* Memory allocation. */
|
||||
_asso_values = new int[_alpha_size];
|
||||
|
||||
/* Memory allocation in each Keyword. */
|
||||
for (temp = _head; temp; temp = temp->rest())
|
||||
{
|
||||
KeywordExt *keyword = temp->first();
|
||||
keyword->_undetermined_chars = new unsigned int[keyword->_selchars_length];
|
||||
}
|
||||
|
||||
int non_linked_length = _list_len;
|
||||
unsigned int asso_value_max;
|
||||
|
||||
@@ -943,10 +952,6 @@ struct EquivalenceClass
|
||||
KeywordExt_List * _keywords_last;
|
||||
/* The number of keywords in this equivalence class. */
|
||||
unsigned int _cardinality;
|
||||
/* The undetermined selected characters for the keywords in this
|
||||
equivalence class, as a canonically reordered multiset. */
|
||||
unsigned int * _undetermined_chars;
|
||||
unsigned int _undetermined_chars_length;
|
||||
|
||||
EquivalenceClass * _next;
|
||||
};
|
||||
@@ -984,48 +989,78 @@ equals (const unsigned int *ptr1, const unsigned int *ptr2, unsigned int len)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
undetermined_equals (KeywordExt *key1, KeywordExt *key2)
|
||||
{
|
||||
return (key1->_undetermined_chars_length == key2 ->_undetermined_chars_length)
|
||||
&& equals (key1->_undetermined_chars, key2->_undetermined_chars,
|
||||
key1->_undetermined_chars_length);
|
||||
}
|
||||
|
||||
static size_t
|
||||
undetermined_hashcode (KeywordExt *key)
|
||||
{
|
||||
return key->_undetermined_chars_hashcode;
|
||||
}
|
||||
|
||||
EquivalenceClass *
|
||||
Search::compute_partition (bool *undetermined) const
|
||||
{
|
||||
EquivalenceClass *partition = NULL;
|
||||
EquivalenceClass *partition_last = NULL;
|
||||
/* Prepare the use of the hash-map: For each keyword,
|
||||
compute the undetermined characters and their hash code. */
|
||||
for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
|
||||
{
|
||||
KeywordExt *keyword = temp->first();
|
||||
|
||||
/* Compute the undetermined characters for this keyword. */
|
||||
unsigned int *undetermined_chars =
|
||||
new unsigned int[keyword->_selchars_length];
|
||||
/* This scratch memory, an array of length keyword->_selchars_length,
|
||||
was allocated earlier. */
|
||||
unsigned int *undetermined_chars = keyword->_undetermined_chars;
|
||||
unsigned int undetermined_chars_length = 0;
|
||||
|
||||
for (int i = 0; i < keyword->_selchars_length; i++)
|
||||
if (undetermined[keyword->_selchars[i]])
|
||||
undetermined_chars[undetermined_chars_length++] = keyword->_selchars[i];
|
||||
keyword->_undetermined_chars_length = undetermined_chars_length;
|
||||
|
||||
{
|
||||
const int SIZE_BITS = sizeof (size_t) * CHAR_BIT;
|
||||
size_t h = undetermined_chars_length;
|
||||
for (unsigned int i = 0; i < undetermined_chars_length; i++)
|
||||
h = undetermined_chars[i] * 641 + ((h << 9) | (h >> (SIZE_BITS - 9)));
|
||||
keyword->_undetermined_chars_hashcode = h;
|
||||
}
|
||||
}
|
||||
|
||||
EquivalenceClass *partition = NULL;
|
||||
EquivalenceClass *partition_last = NULL;
|
||||
/* A hash-map that maps each keyword to the EquivalenceClass that contains
|
||||
it. */
|
||||
gl_Map<KeywordExt *, EquivalenceClass const *>
|
||||
map (GL_HASH_MAP, undetermined_equals, undetermined_hashcode, NULL, NULL);
|
||||
for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
|
||||
{
|
||||
KeywordExt *keyword = temp->first();
|
||||
|
||||
/* Look up the equivalence class to which this keyword belongs. */
|
||||
EquivalenceClass *equclass;
|
||||
for (equclass = partition; equclass; equclass = equclass->_next)
|
||||
if (equclass->_undetermined_chars_length == undetermined_chars_length
|
||||
&& equals (equclass->_undetermined_chars, undetermined_chars,
|
||||
undetermined_chars_length))
|
||||
break;
|
||||
EquivalenceClass *equclass = const_cast<EquivalenceClass *>(map.get(keyword));
|
||||
if (equclass == NULL)
|
||||
{
|
||||
equclass = new EquivalenceClass();
|
||||
equclass->_keywords = NULL;
|
||||
equclass->_keywords_last = NULL;
|
||||
equclass->_cardinality = 0;
|
||||
equclass->_undetermined_chars = undetermined_chars;
|
||||
equclass->_undetermined_chars_length = undetermined_chars_length;
|
||||
equclass->_next = NULL;
|
||||
|
||||
/* Map this keyword (and all equivalent ones that will be seen later)
|
||||
to equclass. */
|
||||
map.put(keyword, equclass);
|
||||
|
||||
if (partition)
|
||||
partition_last->_next = equclass;
|
||||
else
|
||||
partition = equclass;
|
||||
partition_last = equclass;
|
||||
}
|
||||
else
|
||||
delete[] undetermined_chars;
|
||||
|
||||
/* Add the keyword to the equivalence class. */
|
||||
KeywordExt_List *cons = new KeywordExt_List(keyword);
|
||||
@@ -1037,10 +1072,6 @@ Search::compute_partition (bool *undetermined) const
|
||||
equclass->_cardinality++;
|
||||
}
|
||||
|
||||
/* Free some of the allocated memory. The caller doesn't need it. */
|
||||
for (EquivalenceClass *cls = partition; cls; cls = cls->_next)
|
||||
delete[] cls->_undetermined_chars;
|
||||
|
||||
return partition;
|
||||
}
|
||||
|
||||
@@ -1052,7 +1083,6 @@ delete_partition (EquivalenceClass *partition)
|
||||
EquivalenceClass *equclass = partition;
|
||||
partition = equclass->_next;
|
||||
delete_list (equclass->_keywords);
|
||||
//delete[] equclass->_undetermined_chars; // already freed above
|
||||
delete equclass;
|
||||
}
|
||||
}
|
||||
@@ -1572,6 +1602,13 @@ Search::find_good_asso_values ()
|
||||
delete[] best_asso_values;
|
||||
/* The keywords' _hash_value fields are recomputed below. */
|
||||
}
|
||||
|
||||
/* Memory deallocation in each Keyword. */
|
||||
for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
|
||||
{
|
||||
KeywordExt *keyword = temp->first();
|
||||
delete[] keyword->_undetermined_chars;
|
||||
}
|
||||
}
|
||||
|
||||
/* ========================================================================= */
|
||||
|
||||
Reference in New Issue
Block a user