[LyX/2.4.x] Remove performance bottleneck in getAuthors()

Juergen Spitzmueller spitz at lyx.org
Fri Jul 5 16:57:55 UTC 2024


commit 0f35e3141bc5b6baf2eb44bc63a34b5427db2cd5
Author: Juergen Spitzmueller <spitz at lyx.org>
Date:   Fri Jul 5 14:05:26 2024 +0200

    Remove performance bottleneck in getAuthors()
    
    The regex is expensive which is especially noticeable with very long
    author lists.
    
    This introduces a case-insensitive subst() variant which is much faster.
    
    (cherry picked from commit 8ba74fe9589fca3b00134e4d4f1fc130ad960c69)
---
 src/BiblioInfo.cpp       |  7 +------
 src/support/lstrings.cpp | 26 +++++++++++++++++++-------
 src/support/lstrings.h   |  3 ++-
 3 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/src/BiblioInfo.cpp b/src/BiblioInfo.cpp
index 253fb3759c..e2ea4bde54 100644
--- a/src/BiblioInfo.cpp
+++ b/src/BiblioInfo.cpp
@@ -279,12 +279,7 @@ vector<docstring> const getAuthors(docstring const & author)
 	// Then, we temporarily make all " and " strings to ampersands in order
 	// to handle them later on a per-char level. Note that arbitrary casing
 	// ("And", "AND", "aNd", ...) is allowed in bibtex (#10465).
-	static regex const and_reg("(.* )([aA][nN][dD])( .*)");
-	smatch sub;
-	string res = to_utf8(iname);
-	while (regex_match(res, sub, and_reg))
-		res = sub.str(1) + "&" + sub.str(3);
-	iname = from_utf8(res);
+	iname = subst(iname, from_ascii(" and "), from_ascii(" & "), false);
 	// Now we traverse through the string and replace the "&" by the proper
 	// output in- and outside groups
 	docstring name;
diff --git a/src/support/lstrings.cpp b/src/support/lstrings.cpp
index 61ea5bf36a..600885f80c 100644
--- a/src/support/lstrings.cpp
+++ b/src/support/lstrings.cpp
@@ -913,16 +913,27 @@ String const subst_string(String const & a,
 
 
 docstring const subst_string(docstring const & a,
-		docstring const & oldstr, docstring const & newstr)
+		docstring const & oldstr, docstring const & newstr,
+		bool const case_sens)
 {
 	LASSERT(!oldstr.empty(), return a);
 	docstring lstr = a;
 	size_t i = 0;
 	size_t const olen = oldstr.length();
-	while ((i = lstr.find(oldstr, i)) != string::npos) {
-		lstr.replace(i, olen, newstr);
-		i += newstr.length(); // We need to be sure that we don't
-		// use the same i over and over again.
+	if (case_sens)
+		while ((i = lstr.find(oldstr, i)) != string::npos) {
+			lstr.replace(i, olen, newstr);
+			i += newstr.length(); // We need to be sure that we don't
+			// use the same i over and over again.
+		}
+	else {
+		docstring lcstr = lowercase(lstr);
+		while ((i = lcstr.find(oldstr, i)) != string::npos) {
+			lstr.replace(i, olen, newstr);
+			i += newstr.length(); // We need to be sure that we don't
+			// use the same i over and over again.
+			lcstr = lowercase(lstr);
+		}
 	}
 	return lstr;
 }
@@ -951,9 +962,10 @@ string const subst(string const & a,
 
 
 docstring const subst(docstring const & a,
-		docstring const & oldstr, docstring const & newstr)
+		docstring const & oldstr, docstring const & newstr,
+		bool case_sens)
 {
-	return subst_string(a, oldstr, newstr);
+	return subst_string(a, oldstr, newstr, case_sens);
 }
 
 
diff --git a/src/support/lstrings.h b/src/support/lstrings.h
index 390d29c66a..b406f30994 100644
--- a/src/support/lstrings.h
+++ b/src/support/lstrings.h
@@ -196,7 +196,8 @@ std::string const subst(std::string const & a,
 
 /// substitutes all instances of \a oldstr with \a newstr
 docstring const subst(docstring const & a,
-		docstring const & oldstr, docstring const & newstr);
+		docstring const & oldstr, docstring const & newstr,
+		bool case_sens = true);
 
 /// Count all occurrences of char \a chr inside \a str
 int count_char(std::string const & str, char chr);


More information about the lyx-cvs mailing list