[LyX/2.4.x] Remove performance bottleneck in getAuthors()
Juergen Spitzmueller
spitz at lyx.org
Fri Jul 5 16:57:55 UTC 2024
commit 0f35e3141bc5b6baf2eb44bc63a34b5427db2cd5
Author: Juergen Spitzmueller <spitz at lyx.org>
Date: Fri Jul 5 14:05:26 2024 +0200
Remove performance bottleneck in getAuthors()
The regex is expensive which is especially noticeable with very long
author lists.
This introduces a case-insensitive subst() variant which is much faster.
(cherry picked from commit 8ba74fe9589fca3b00134e4d4f1fc130ad960c69)
---
src/BiblioInfo.cpp | 7 +------
src/support/lstrings.cpp | 26 +++++++++++++++++++-------
src/support/lstrings.h | 3 ++-
3 files changed, 22 insertions(+), 14 deletions(-)
diff --git a/src/BiblioInfo.cpp b/src/BiblioInfo.cpp
index 253fb3759c..e2ea4bde54 100644
--- a/src/BiblioInfo.cpp
+++ b/src/BiblioInfo.cpp
@@ -279,12 +279,7 @@ vector<docstring> const getAuthors(docstring const & author)
// Then, we temporarily make all " and " strings to ampersands in order
// to handle them later on a per-char level. Note that arbitrary casing
// ("And", "AND", "aNd", ...) is allowed in bibtex (#10465).
- static regex const and_reg("(.* )([aA][nN][dD])( .*)");
- smatch sub;
- string res = to_utf8(iname);
- while (regex_match(res, sub, and_reg))
- res = sub.str(1) + "&" + sub.str(3);
- iname = from_utf8(res);
+ iname = subst(iname, from_ascii(" and "), from_ascii(" & "), false);
// Now we traverse through the string and replace the "&" by the proper
// output in- and outside groups
docstring name;
diff --git a/src/support/lstrings.cpp b/src/support/lstrings.cpp
index 61ea5bf36a..600885f80c 100644
--- a/src/support/lstrings.cpp
+++ b/src/support/lstrings.cpp
@@ -913,16 +913,27 @@ String const subst_string(String const & a,
docstring const subst_string(docstring const & a,
- docstring const & oldstr, docstring const & newstr)
+ docstring const & oldstr, docstring const & newstr,
+ bool const case_sens)
{
LASSERT(!oldstr.empty(), return a);
docstring lstr = a;
size_t i = 0;
size_t const olen = oldstr.length();
- while ((i = lstr.find(oldstr, i)) != string::npos) {
- lstr.replace(i, olen, newstr);
- i += newstr.length(); // We need to be sure that we don't
- // use the same i over and over again.
+ if (case_sens)
+ while ((i = lstr.find(oldstr, i)) != string::npos) {
+ lstr.replace(i, olen, newstr);
+ i += newstr.length(); // We need to be sure that we don't
+ // use the same i over and over again.
+ }
+ else {
+ docstring lcstr = lowercase(lstr);
+ while ((i = lcstr.find(oldstr, i)) != string::npos) {
+ lstr.replace(i, olen, newstr);
+ i += newstr.length(); // We need to be sure that we don't
+ // use the same i over and over again.
+ lcstr = lowercase(lstr);
+ }
}
return lstr;
}
@@ -951,9 +962,10 @@ string const subst(string const & a,
docstring const subst(docstring const & a,
- docstring const & oldstr, docstring const & newstr)
+ docstring const & oldstr, docstring const & newstr,
+ bool case_sens)
{
- return subst_string(a, oldstr, newstr);
+ return subst_string(a, oldstr, newstr, case_sens);
}
diff --git a/src/support/lstrings.h b/src/support/lstrings.h
index 390d29c66a..b406f30994 100644
--- a/src/support/lstrings.h
+++ b/src/support/lstrings.h
@@ -196,7 +196,8 @@ std::string const subst(std::string const & a,
/// substitutes all instances of \a oldstr with \a newstr
docstring const subst(docstring const & a,
- docstring const & oldstr, docstring const & newstr);
+ docstring const & oldstr, docstring const & newstr,
+ bool case_sens = true);
/// Count all occurrences of char \a chr inside \a str
int count_char(std::string const & str, char chr);
More information about the lyx-cvs
mailing list