mirror of
https://git.savannah.gnu.org/git/gperf.git
synced 2025-12-02 13:09:22 +00:00
Optimize: Use a hash table in compute_partition.
This reduces the execution time of gperf on large inputs by ca. 30%. * autogen.sh (GNULIB_MODULES): Add map-c++, hash-map. * src/keyword.h: Include <stddef.h>. (struct KeywordExt): Add fields _undetermined_chars, _undetermined_chars_length, _undetermined_chars_hashcode. * src/search.cc: Include gl_map.hh, gl_hash_map.h. (Search::prepare_asso_values): Initialize the _undetermined_chars field. (struct EquivalenceClass): Remove the fields _undetermined_chars, _undetermined_chars_length. (undetermined_equals, undetermined_hashcode): New functions. (Search::compute_partition): Initialize the _undetermined_chars* fields of all keywords. Use a hash map instead of a loop over the equivalence classes. (Search::find_good_asso_values): Deallocate the _undetermined_chars field.
This commit is contained in:
18
ChangeLog
18
ChangeLog
@@ -1,3 +1,21 @@
|
|||||||
|
2025-04-19 Bruno Haible <bruno@clisp.org>
|
||||||
|
|
||||||
|
Optimize: Use a hash table in compute_partition.
|
||||||
|
This reduces the execution time of gperf on large inputs by ca. 30%.
|
||||||
|
* autogen.sh (GNULIB_MODULES): Add map-c++, hash-map.
|
||||||
|
* src/keyword.h: Include <stddef.h>.
|
||||||
|
(struct KeywordExt): Add fields _undetermined_chars,
|
||||||
|
_undetermined_chars_length, _undetermined_chars_hashcode.
|
||||||
|
* src/search.cc: Include gl_map.hh, gl_hash_map.h.
|
||||||
|
(Search::prepare_asso_values): Initialize the _undetermined_chars field.
|
||||||
|
(struct EquivalenceClass): Remove the fields _undetermined_chars,
|
||||||
|
_undetermined_chars_length.
|
||||||
|
(undetermined_equals, undetermined_hashcode): New functions.
|
||||||
|
(Search::compute_partition): Initialize the _undetermined_chars* fields
|
||||||
|
of all keywords. Use a hash map instead of a loop over the equivalence
|
||||||
|
classes.
|
||||||
|
(Search::find_good_asso_values): Deallocate the _undetermined_chars field.
|
||||||
|
|
||||||
2025-04-19 Bruno Haible <bruno@clisp.org>
|
2025-04-19 Bruno Haible <bruno@clisp.org>
|
||||||
|
|
||||||
Optimize: Make Bool_Array take less memory.
|
Optimize: Make Bool_Array take less memory.
|
||||||
|
|||||||
@@ -66,6 +66,7 @@ if test $skip_gnulib = false; then
|
|||||||
GNULIB_MODULES='
|
GNULIB_MODULES='
|
||||||
filename
|
filename
|
||||||
getopt-gnu
|
getopt-gnu
|
||||||
|
map-c++ hash-map
|
||||||
read-file
|
read-file
|
||||||
package-version
|
package-version
|
||||||
'
|
'
|
||||||
|
|||||||
28
lib/.gitignore
vendored
28
lib/.gitignore
vendored
@@ -5,12 +5,19 @@
|
|||||||
/alloca.in.h
|
/alloca.in.h
|
||||||
/arg-nonnull.h
|
/arg-nonnull.h
|
||||||
/assert.in.h
|
/assert.in.h
|
||||||
|
/attribute.h
|
||||||
|
/basename-lgpl.c
|
||||||
|
/basename-lgpl.h
|
||||||
/c++defs.h
|
/c++defs.h
|
||||||
/cloexec.c
|
/cloexec.c
|
||||||
/cloexec.h
|
/cloexec.h
|
||||||
/close.c
|
/close.c
|
||||||
/dup2.c
|
/dup2.c
|
||||||
/errno.in.h
|
/errno.in.h
|
||||||
|
/error.c
|
||||||
|
/error.in.h
|
||||||
|
/exitfail.c
|
||||||
|
/exitfail.h
|
||||||
/fcntl.c
|
/fcntl.c
|
||||||
/fcntl.in.h
|
/fcntl.in.h
|
||||||
/fd-hook.c
|
/fd-hook.c
|
||||||
@@ -31,9 +38,22 @@
|
|||||||
/getopt.in.h
|
/getopt.in.h
|
||||||
/getopt1.c
|
/getopt1.c
|
||||||
/getopt_int.h
|
/getopt_int.h
|
||||||
|
/getprogname.c
|
||||||
|
/getprogname.h
|
||||||
/gettext.h
|
/gettext.h
|
||||||
|
/gl_anyhash1.h
|
||||||
|
/gl_anyhash2.h
|
||||||
|
/gl_anyhash_primes.h
|
||||||
|
/gl_hash_map.c
|
||||||
|
/gl_hash_map.h
|
||||||
|
/gl_map.c
|
||||||
|
/gl_map.h
|
||||||
|
/gl_map.hh
|
||||||
|
/gl_xmap.c
|
||||||
|
/gl_xmap.h
|
||||||
/idx.h
|
/idx.h
|
||||||
/intprops-internal.h
|
/intprops-internal.h
|
||||||
|
/intprops.h
|
||||||
/inttypes.in.h
|
/inttypes.in.h
|
||||||
/limits.in.h
|
/limits.in.h
|
||||||
/lseek.c
|
/lseek.c
|
||||||
@@ -51,6 +71,7 @@
|
|||||||
/read-file.c
|
/read-file.c
|
||||||
/read-file.h
|
/read-file.h
|
||||||
/realloc.c
|
/realloc.c
|
||||||
|
/size_max.h
|
||||||
/stat-time.c
|
/stat-time.c
|
||||||
/stat-time.h
|
/stat-time.h
|
||||||
/stat-w32.c
|
/stat-w32.c
|
||||||
@@ -65,6 +86,9 @@
|
|||||||
/stdio.in.h
|
/stdio.in.h
|
||||||
/stdlib.c
|
/stdlib.c
|
||||||
/stdlib.in.h
|
/stdlib.in.h
|
||||||
|
/strerror-override.c
|
||||||
|
/strerror-override.h
|
||||||
|
/strerror.c
|
||||||
/string.in.h
|
/string.in.h
|
||||||
/sys_stat.in.h
|
/sys_stat.in.h
|
||||||
/sys_types.in.h
|
/sys_types.in.h
|
||||||
@@ -74,7 +98,11 @@
|
|||||||
/verify.h
|
/verify.h
|
||||||
/warn-on-use.h
|
/warn-on-use.h
|
||||||
/wchar.in.h
|
/wchar.in.h
|
||||||
|
/xalloc-die.c
|
||||||
/xalloc-oversized.h
|
/xalloc-oversized.h
|
||||||
|
/xalloc.h
|
||||||
|
/xsize.c
|
||||||
|
/xsize.h
|
||||||
|
|
||||||
# Files generated by the autotools:
|
# Files generated by the autotools:
|
||||||
/aclocal.m4
|
/aclocal.m4
|
||||||
|
|||||||
@@ -24,6 +24,8 @@
|
|||||||
#ifndef keyword_h
|
#ifndef keyword_h
|
||||||
#define keyword_h 1
|
#define keyword_h 1
|
||||||
|
|
||||||
|
#include <stddef.h> /* defines size_t */
|
||||||
|
|
||||||
/* Class defined in "positions.h". */
|
/* Class defined in "positions.h". */
|
||||||
class Positions;
|
class Positions;
|
||||||
|
|
||||||
@@ -72,6 +74,13 @@ struct KeywordExt : public Keyword
|
|||||||
/* Deletes selchars. */
|
/* Deletes selchars. */
|
||||||
void delete_selchars ();
|
void delete_selchars ();
|
||||||
|
|
||||||
|
/* Data members used by the algorithm, specifically compute_partition. */
|
||||||
|
/* The undetermined selected characters for this keyword, as a
|
||||||
|
canonically reordered multiset. */
|
||||||
|
unsigned int * _undetermined_chars;
|
||||||
|
unsigned int _undetermined_chars_length;
|
||||||
|
size_t _undetermined_chars_hashcode;
|
||||||
|
|
||||||
/* Data members used by the algorithm. */
|
/* Data members used by the algorithm. */
|
||||||
int _hash_value; /* Hash value for the keyword. */
|
int _hash_value; /* Hash value for the keyword. */
|
||||||
|
|
||||||
|
|||||||
@@ -28,7 +28,9 @@
|
|||||||
#include <string.h> /* declares memset(), memcmp() */
|
#include <string.h> /* declares memset(), memcmp() */
|
||||||
#include <time.h> /* declares time() */
|
#include <time.h> /* declares time() */
|
||||||
#include <math.h> /* declares exp() */
|
#include <math.h> /* declares exp() */
|
||||||
#include <limits.h> /* defines INT_MIN, INT_MAX, UINT_MAX */
|
#include <limits.h> /* defines INT_MIN, INT_MAX, UINT_MAX, CHAR_BIT */
|
||||||
|
#include "gl_map.hh"
|
||||||
|
#include "gl_hash_map.h"
|
||||||
#include "options.h"
|
#include "options.h"
|
||||||
#include "hash-table.h"
|
#include "hash-table.h"
|
||||||
|
|
||||||
@@ -839,6 +841,13 @@ Search::prepare_asso_values ()
|
|||||||
/* Memory allocation. */
|
/* Memory allocation. */
|
||||||
_asso_values = new int[_alpha_size];
|
_asso_values = new int[_alpha_size];
|
||||||
|
|
||||||
|
/* Memory allocation in each Keyword. */
|
||||||
|
for (temp = _head; temp; temp = temp->rest())
|
||||||
|
{
|
||||||
|
KeywordExt *keyword = temp->first();
|
||||||
|
keyword->_undetermined_chars = new unsigned int[keyword->_selchars_length];
|
||||||
|
}
|
||||||
|
|
||||||
int non_linked_length = _list_len;
|
int non_linked_length = _list_len;
|
||||||
unsigned int asso_value_max;
|
unsigned int asso_value_max;
|
||||||
|
|
||||||
@@ -943,10 +952,6 @@ struct EquivalenceClass
|
|||||||
KeywordExt_List * _keywords_last;
|
KeywordExt_List * _keywords_last;
|
||||||
/* The number of keywords in this equivalence class. */
|
/* The number of keywords in this equivalence class. */
|
||||||
unsigned int _cardinality;
|
unsigned int _cardinality;
|
||||||
/* The undetermined selected characters for the keywords in this
|
|
||||||
equivalence class, as a canonically reordered multiset. */
|
|
||||||
unsigned int * _undetermined_chars;
|
|
||||||
unsigned int _undetermined_chars_length;
|
|
||||||
|
|
||||||
EquivalenceClass * _next;
|
EquivalenceClass * _next;
|
||||||
};
|
};
|
||||||
@@ -984,48 +989,78 @@ equals (const unsigned int *ptr1, const unsigned int *ptr2, unsigned int len)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
undetermined_equals (KeywordExt *key1, KeywordExt *key2)
|
||||||
|
{
|
||||||
|
return (key1->_undetermined_chars_length == key2 ->_undetermined_chars_length)
|
||||||
|
&& equals (key1->_undetermined_chars, key2->_undetermined_chars,
|
||||||
|
key1->_undetermined_chars_length);
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t
|
||||||
|
undetermined_hashcode (KeywordExt *key)
|
||||||
|
{
|
||||||
|
return key->_undetermined_chars_hashcode;
|
||||||
|
}
|
||||||
|
|
||||||
EquivalenceClass *
|
EquivalenceClass *
|
||||||
Search::compute_partition (bool *undetermined) const
|
Search::compute_partition (bool *undetermined) const
|
||||||
{
|
{
|
||||||
EquivalenceClass *partition = NULL;
|
/* Prepare the use of the hash-map: For each keyword,
|
||||||
EquivalenceClass *partition_last = NULL;
|
compute the undetermined characters and their hash code. */
|
||||||
for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
|
for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
|
||||||
{
|
{
|
||||||
KeywordExt *keyword = temp->first();
|
KeywordExt *keyword = temp->first();
|
||||||
|
|
||||||
/* Compute the undetermined characters for this keyword. */
|
/* This scratch memory, an array of length keyword->_selchars_length,
|
||||||
unsigned int *undetermined_chars =
|
was allocated earlier. */
|
||||||
new unsigned int[keyword->_selchars_length];
|
unsigned int *undetermined_chars = keyword->_undetermined_chars;
|
||||||
unsigned int undetermined_chars_length = 0;
|
unsigned int undetermined_chars_length = 0;
|
||||||
|
|
||||||
for (int i = 0; i < keyword->_selchars_length; i++)
|
for (int i = 0; i < keyword->_selchars_length; i++)
|
||||||
if (undetermined[keyword->_selchars[i]])
|
if (undetermined[keyword->_selchars[i]])
|
||||||
undetermined_chars[undetermined_chars_length++] = keyword->_selchars[i];
|
undetermined_chars[undetermined_chars_length++] = keyword->_selchars[i];
|
||||||
|
keyword->_undetermined_chars_length = undetermined_chars_length;
|
||||||
|
|
||||||
|
{
|
||||||
|
const int SIZE_BITS = sizeof (size_t) * CHAR_BIT;
|
||||||
|
size_t h = undetermined_chars_length;
|
||||||
|
for (unsigned int i = 0; i < undetermined_chars_length; i++)
|
||||||
|
h = undetermined_chars[i] * 641 + ((h << 9) | (h >> (SIZE_BITS - 9)));
|
||||||
|
keyword->_undetermined_chars_hashcode = h;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
EquivalenceClass *partition = NULL;
|
||||||
|
EquivalenceClass *partition_last = NULL;
|
||||||
|
/* A hash-map that maps each keyword to the EquivalenceClass that contains
|
||||||
|
it. */
|
||||||
|
gl_Map<KeywordExt *, EquivalenceClass const *>
|
||||||
|
map (GL_HASH_MAP, undetermined_equals, undetermined_hashcode, NULL, NULL);
|
||||||
|
for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
|
||||||
|
{
|
||||||
|
KeywordExt *keyword = temp->first();
|
||||||
|
|
||||||
/* Look up the equivalence class to which this keyword belongs. */
|
/* Look up the equivalence class to which this keyword belongs. */
|
||||||
EquivalenceClass *equclass;
|
EquivalenceClass *equclass = const_cast<EquivalenceClass *>(map.get(keyword));
|
||||||
for (equclass = partition; equclass; equclass = equclass->_next)
|
|
||||||
if (equclass->_undetermined_chars_length == undetermined_chars_length
|
|
||||||
&& equals (equclass->_undetermined_chars, undetermined_chars,
|
|
||||||
undetermined_chars_length))
|
|
||||||
break;
|
|
||||||
if (equclass == NULL)
|
if (equclass == NULL)
|
||||||
{
|
{
|
||||||
equclass = new EquivalenceClass();
|
equclass = new EquivalenceClass();
|
||||||
equclass->_keywords = NULL;
|
equclass->_keywords = NULL;
|
||||||
equclass->_keywords_last = NULL;
|
equclass->_keywords_last = NULL;
|
||||||
equclass->_cardinality = 0;
|
equclass->_cardinality = 0;
|
||||||
equclass->_undetermined_chars = undetermined_chars;
|
|
||||||
equclass->_undetermined_chars_length = undetermined_chars_length;
|
|
||||||
equclass->_next = NULL;
|
equclass->_next = NULL;
|
||||||
|
|
||||||
|
/* Map this keyword (and all equivalent ones that will be seen later)
|
||||||
|
to equclass. */
|
||||||
|
map.put(keyword, equclass);
|
||||||
|
|
||||||
if (partition)
|
if (partition)
|
||||||
partition_last->_next = equclass;
|
partition_last->_next = equclass;
|
||||||
else
|
else
|
||||||
partition = equclass;
|
partition = equclass;
|
||||||
partition_last = equclass;
|
partition_last = equclass;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
delete[] undetermined_chars;
|
|
||||||
|
|
||||||
/* Add the keyword to the equivalence class. */
|
/* Add the keyword to the equivalence class. */
|
||||||
KeywordExt_List *cons = new KeywordExt_List(keyword);
|
KeywordExt_List *cons = new KeywordExt_List(keyword);
|
||||||
@@ -1037,10 +1072,6 @@ Search::compute_partition (bool *undetermined) const
|
|||||||
equclass->_cardinality++;
|
equclass->_cardinality++;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Free some of the allocated memory. The caller doesn't need it. */
|
|
||||||
for (EquivalenceClass *cls = partition; cls; cls = cls->_next)
|
|
||||||
delete[] cls->_undetermined_chars;
|
|
||||||
|
|
||||||
return partition;
|
return partition;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1052,7 +1083,6 @@ delete_partition (EquivalenceClass *partition)
|
|||||||
EquivalenceClass *equclass = partition;
|
EquivalenceClass *equclass = partition;
|
||||||
partition = equclass->_next;
|
partition = equclass->_next;
|
||||||
delete_list (equclass->_keywords);
|
delete_list (equclass->_keywords);
|
||||||
//delete[] equclass->_undetermined_chars; // already freed above
|
|
||||||
delete equclass;
|
delete equclass;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1572,6 +1602,13 @@ Search::find_good_asso_values ()
|
|||||||
delete[] best_asso_values;
|
delete[] best_asso_values;
|
||||||
/* The keywords' _hash_value fields are recomputed below. */
|
/* The keywords' _hash_value fields are recomputed below. */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Memory deallocation in each Keyword. */
|
||||||
|
for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
|
||||||
|
{
|
||||||
|
KeywordExt *keyword = temp->first();
|
||||||
|
delete[] keyword->_undetermined_chars;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ========================================================================= */
|
/* ========================================================================= */
|
||||||
|
|||||||
Reference in New Issue
Block a user