[LyX/master] DocBook: when a similar tag is present multiple times in the bibliography entry, output it only once.

Thibaut Cuvelier tcuvelier at lyx.org
Mon Feb 26 15:10:02 UTC 2024


commit 8e0bc01d50fc7d7af454cb8bed4d098f85b3f73b
Author: Thibaut Cuvelier <tcuvelier at lyx.org>
Date:   Mon Feb 26 16:04:22 2024 +0100

    DocBook: when a similar tag is present multiple times in the bibliography entry, output it only once.
    
    Other copies have an error message.
---
 autotests/export/docbook/basic.xml | 32 +++------------
 src/insets/InsetBibtex.cpp         | 80 ++++++++++++++++++++++++++++++--------
 2 files changed, 68 insertions(+), 44 deletions(-)

diff --git a/autotests/export/docbook/basic.xml b/autotests/export/docbook/basic.xml
index 87c951f359..c03a26c12d 100644
--- a/autotests/export/docbook/basic.xml
+++ b/autotests/export/docbook/basic.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<!-- This DocBook file was created by LyX 2.4.0dev
+<!-- This DocBook file was created by LyX 2.4.0~devel
   See https://www.lyx.org/ for more information -->
 <article xml:lang="en-US" xmlns="http://docbook.org/ns/docbook" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:xi="http://www.w3.org/2001/XInclude" version="5.2">
 <info>
@@ -67,30 +67,8 @@
 <m:mtable displaystyle='true'>
  <m:mtr>
   <m:mtd>
-   <m:mstyle mathvariant='normal'>
-    <m:mrow>
-     <m:mi>I</m:mi> 
-     <m:mi>a</m:mi>
-     <m:mi>m</m:mi> 
-     <m:mi>a</m:mi> 
-     <m:mi>f</m:mi>
-     <m:mi>o</m:mi>
-     <m:mi>r</m:mi>
-     <m:mi>m</m:mi>
-     <m:mi>u</m:mi>
-     <m:mi>l</m:mi>
-     <m:mi>a</m:mi> 
-     <m:mi>w</m:mi>
-     <m:mi>i</m:mi>
-     <m:mi>t</m:mi>
-     <m:mi>h</m:mi> 
-     <m:mi>a</m:mi> 
-     <m:mi>r</m:mi>
-     <m:mi>e</m:mi>
-     <m:mi>f</m:mi>
-     <m:mn>.</m:mn>
-    </m:mrow>
-   </m:mstyle>
+   <m:mtext>I am a formula with a ref
+   <m:mn>.</m:mn></m:mtext>
   </m:mtd>
   <m:mtd>
    <m:mtext>(1)</m:mtext>
@@ -285,8 +263,8 @@ I am no more code. </para>
 </section>
 <section>
 <title>I am the sixth section and I really like bibliographies</title>
-<para>This text has references. First reference: <biblioref endterm="big" />. Second reference: <biblioref endterm="small" />. Both at the same time: <biblioref endterm="big" />, <biblioref endterm="small" />. A book: <biblioref endterm="Gro60" />. </para>
-<para>Many things, just testing for completeness: <biblioref endterm="article" />, <biblioref endterm="book" />, <biblioref endterm="booklet" />, <biblioref endterm="conference" />, <biblioref endterm="inbook" />, <biblioref endterm="incollection" />. </para>
+<para>This text has references. First reference: <biblioref linkend="big" />. Second reference: <biblioref linkend="small" />. Both at the same time: <biblioref linkend="big" />, <biblioref linkend="small" />. A book: <biblioref linkend="Gro60" />. </para>
+<para>Many things, just testing for completeness: <biblioref linkend="article" />, <biblioref linkend="book" />, <biblioref linkend="booklet" />, <biblioref linkend="conference" />, <biblioref linkend="inbook" />, <biblioref linkend="incollection" />. </para>
 </section>
 <section>
 <title>I am the seventh section and I deal with indices</title>
diff --git a/src/insets/InsetBibtex.cpp b/src/insets/InsetBibtex.cpp
index 6d38631f81..8a2a697648 100644
--- a/src/insets/InsetBibtex.cpp
+++ b/src/insets/InsetBibtex.cpp
@@ -1128,7 +1128,6 @@ void InsetBibtex::docbook(XMLStream & xs, OutputParams const &) const
 	        make_pair("url", "uri")
 	};
 	// Relations between documents.
-	// TODO: some elements should be mutually exclusive; right now, all of them are output.
 	vector<pair<string, string>> relations = { // <bibtex, docbook biblioset relation>
 	        make_pair("journal", "journal"),
 	        make_pair("journaltitle", "journal"),
@@ -1149,7 +1148,6 @@ void InsetBibtex::docbook(XMLStream & xs, OutputParams const &) const
 	toDocBookTag["fullbynames:editor"] = "SPECIFIC";  // No direct translation to DocBook: <editor><personname/orgname>.
 	toDocBookTag["institution"] = "SPECIFIC"; // No direct translation to DocBook: <org>.
 
-	// TODO: some elements should be mutually exclusive; right now, all of them are output.
 	toDocBookTag["title"] = "title";
 	toDocBookTag["fulltitle"] = "title";
 	toDocBookTag["quotetitle"] = "title";
@@ -1162,7 +1160,6 @@ void InsetBibtex::docbook(XMLStream & xs, OutputParams const &) const
 	toDocBookTag["year"] = "SPECIFIC"; // No direct translation to DocBook: <pubdate>.
 	toDocBookTag["month"] = "SPECIFIC"; // No direct translation to DocBook: <pubdate>.
 
-	// TODO: some elements should be mutually exclusive; right now, all of them are output.
 	toDocBookTag["journal"] = "SPECIFIC"; // No direct translation to DocBook: <biblioset>.
 	toDocBookTag["journaltitle"] = "SPECIFIC"; // No direct translation to DocBook: <biblioset>.
 	toDocBookTag["fulljournaltitle"] = "SPECIFIC"; // No direct translation to DocBook: <biblioset>.
@@ -1210,22 +1207,35 @@ void InsetBibtex::docbook(XMLStream & xs, OutputParams const &) const
 		map<string, string> delayedTags;
 
 		// Read all tags from HTML and convert those that have a 1:1 matching.
+		// Avoid outputting the same tag twice in DocBook: several bibliography tags might map to the same DocBook
+		// element, avoid outputting the same DocBook tag twice to keep a valid output. "SPECIFIC" tags are handled in
+		// a more specific way later on (among the delayed tags).
+		set<string> alreadyOutputDocBookTags;
 		while (tagIt != tagEnd) {
 			string tag = tagIt->str(); // regex_match cannot work with temporary strings.
 			++tagIt;
 
 			if (regex_match(tag, match, tagRegex)) {
-				if (toDocBookTag[match[1]] == "SPECIFIC") {
+				const string docbookTag = toDocBookTag[match[1]];
+				if (docbookTag == "SPECIFIC") {
 					delayedTags[match[1]] = match[2];
 				} else {
-					xs << xml::StartTag(toDocBookTag[match[1]]);
-					xs << from_utf8(match[2].str());
-					xs << xml::EndTag(toDocBookTag[match[1]]);
-					xs << xml::CR();
+					if (alreadyOutputDocBookTags.contains(docbookTag)) {
+						xs << XMLStream::ESCAPE_NONE <<
+						   from_utf8("<!-- Several similar tags in the reference for ") + from_utf8(docbookTag) +
+						   from_utf8(". New tag: ") + from_utf8(match[1]) + from_utf8(". Corresponding value: ") +
+						   from_utf8(match[2].str()) + from_utf8(" -->\n");
+					} else {
+						xs << xml::StartTag(docbookTag);
+						xs << from_utf8(match[2].str());
+						xs << xml::EndTag(docbookTag);
+						xs << xml::CR();
+					}
 				}
 			} else {
 				LYXERR0("The BibTeX field " << match[1].str() << " is unknown.");
-				xs << XMLStream::ESCAPE_NONE << from_utf8("<!-- Output Error: The BibTeX field " + match[1].str() + " is unknown -->\n");
+				xs << XMLStream::ESCAPE_NONE <<
+					from_utf8("<!-- Output Error: The BibTeX field " + match[1].str() + " is unknown -->\n");
 			}
 		}
 
@@ -1341,15 +1351,50 @@ void InsetBibtex::docbook(XMLStream & xs, OutputParams const &) const
 			// <biblioset>
 			// Example: http://tdg.docbook.org/tdg/5.1/biblioset.html
 			for (auto const & id: relations) {
+				std::string keptJournal;
+				std::string keptBook;
+
 				if (hasTag(id.first)) {
-					xs << xml::StartTag("biblioset", "relation=\"" + id.second + "\"");
-					xs << xml::CR();
-					xs << xml::StartTag("title");
-					xs << getTag(id.first);
-					xs << xml::EndTag("title");
-					xs << xml::CR();
-					xs << xml::EndTag("biblioset");
-					xs << xml::CR();
+					bool outputThisTag = true;
+
+					// Deal with duplicate entries for the same semantics.
+					if (id.first == "journal" || id.first == "journaltitle" || id.first == "fulljournaltitle") {
+						if (!keptJournal.empty()) {
+							xs << XMLStream::ESCAPE_NONE <<
+							        from_utf8("<!-- Several journal tags in the reference. Kept journal entry: ") +
+									from_utf8(keptJournal) + from_utf8(". Other journal tag: ") +
+								    from_utf8(id.first) + from_utf8(". Corresponding value: ") +
+								    getTag(id.first) + from_utf8(" -->\n");
+							outputThisTag = false;
+						} else {
+							keptJournal = id.first;
+						}
+					} else if (id.first == "booktitle" || id.first == "fullbooktitle") {
+						if (!keptBook.empty()) {
+							xs << XMLStream::ESCAPE_NONE <<
+							   from_utf8("<!-- Several book tags in the reference. Kept book entry: ") +
+							   from_utf8(keptBook) + from_utf8(". Other book tag: ") +
+							   from_utf8(id.first) + from_utf8(". Corresponding value: ") +
+							   getTag(id.first) + from_utf8(" -->\n");
+							outputThisTag = false;
+						} else {
+							keptBook = id.first;
+						}
+					}
+
+					// Output this tag only if it is not a duplicate of a previously output tag.
+					if (outputThisTag) {
+						xs << xml::StartTag("biblioset", "relation=\"" + id.second + "\"");
+						xs << xml::CR();
+						xs << xml::StartTag("title");
+						xs << getTag(id.first);
+						xs << xml::EndTag("title");
+						xs << xml::CR();
+						xs << xml::EndTag("biblioset");
+						xs << xml::CR();
+					}
+
+					// In all cases, erase this tag: it has been dealt with.
 					eraseTag(id.first);
 				}
 			}
@@ -1383,6 +1428,7 @@ void InsetBibtex::docbook(XMLStream & xs, OutputParams const &) const
 				if (hasTag("editor") && hasTag("fullbynames:editor")) {
 					xs << XMLStream::ESCAPE_NONE <<
 							from_utf8("<!-- Several editor tags in the reference. Other editor tag: ") +
+							from_utf8("fullbynames:editor. Corresponding value: ") +
 							getTag("fullbynames:editor") + from_utf8(" -->\n");
 				}
 


More information about the lyx-cvs mailing list