From 70899c856b224f417dc33cac58ac36a793f056dc Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Sat, 19 Apr 2025 19:57:12 +0200 Subject: [PATCH] Optimize: Test large equivalence classes for conflict first. This reduces the execution time of gperf on large inputs by ca. 10%. * src/search.cc (cmp_equiv_classes): New function. (compute_partition): Sort the equivalence classes according to decreasing size. --- ChangeLog | 8 ++++++++ src/search.cc | 22 ++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/ChangeLog b/ChangeLog index 6437545..8358001 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2025-04-19 Bruno Haible + + Optimize: Test large equivalence classes for conflict first. + This reduces the execution time of gperf on large inputs by ca. 10%. + * src/search.cc (cmp_equiv_classes): New function. + (compute_partition): Sort the equivalence classes according to + decreasing size. + 2025-04-19 Bruno Haible Refactor: Use an array-list instead of a linked-list of equiv.-classes. diff --git a/src/search.cc b/src/search.cc index dc3d0f3..711b15b 100644 --- a/src/search.cc +++ b/src/search.cc @@ -1011,6 +1011,19 @@ undetermined_hashcode (KeywordExt *key) return key->_undetermined_chars_hashcode; } +/* Compares the equivalence classes cls1, cls2 pointed to by ptr1 and ptr2 + and returns + < 0 if cls1 is larger than cls2, + 0 if cls1 and cls2 have the same size, + > 0 if cls1 is smaller than cls1. */ +static int +cmp_equiv_classes (void const *ptr1, void const *ptr2) +{ + EquivalenceClass const *cls1 = static_cast(ptr1); + EquivalenceClass const *cls2 = static_cast(ptr2); + return _GL_CMP (cls2->_keywords.size(), cls1->_keywords.size()); +} + Partition * Search::compute_partition (bool *undetermined) const { @@ -1067,6 +1080,15 @@ Search::compute_partition (bool *undetermined) const } } + /* Sort the equivalence classes according to decreasing size. + This results in a speedup of find_asso_values, because on average, a large + equivalence class has a higher probability for a collision than a smaller + equivalence class. (An equivalence class with just 1 keyword never has + a collision.) */ + if (partition->_equclasses.size() > 1) + qsort (&partition->_equclasses.get_at(0), partition->_equclasses.size(), + sizeof (EquivalenceClass), cmp_equiv_classes); + return partition; }