[LyX/master] DocBook: avoid generating empty paragraphs instead of new pages.

Thibaut Cuvelier tcuvelier at lyx.org
Sun Aug 2 01:38:48 UTC 2020


commit 875f7d42e2eb6b9b4cef3c29213b12f85f9756bb
Author: Thibaut Cuvelier <tcuvelier at lyx.org>
Date:   Sun Aug 2 04:03:17 2020 +0200

    DocBook: avoid generating empty paragraphs instead of new pages.
    
    As this required to first generate the paragraph before outputting it if necessary, tests like XMLStream::isTagOpen no more worked properly. This also refactors table handling to get rid of that case (and make code easier to read).
---
 .../docbook/bibliography_precooked_aastex.xml      |    5 -
 src/OutputParams.cpp                               |    2 +-
 src/OutputParams.h                                 |    3 +
 src/insets/InsetFloat.cpp                          |  177 +++++++++++---------
 src/insets/InsetTabular.cpp                        |    6 +-
 src/output_docbook.cpp                             |   24 ++-
 6 files changed, 123 insertions(+), 94 deletions(-)

diff --git a/autotests/export/docbook/bibliography_precooked_aastex.xml b/autotests/export/docbook/bibliography_precooked_aastex.xml
index bc10168..89b1714 100644
--- a/autotests/export/docbook/bibliography_precooked_aastex.xml
+++ b/autotests/export/docbook/bibliography_precooked_aastex.xml
@@ -967,11 +967,8 @@ v(p,\lambda)_{\pm} & = & \pm\lambda(E\mp\lambda|{\textbf{p}}|)^{1/2}\chi
 <bibliomixed xml:id='pet76'>Peterson, C. J. 1976, <!-- \aj -->, 81, 617 </bibliomixed>
 <bibliomixed xml:id='spi85'>Spitzer, L. 1985, Dynamics of Star Clusters, J. Goodman and P. Hut, Dordrecht: Reidel, 109 </bibliomixed>
 </bibliography>
-<para>
-</para>
 <table xml:id="tbl-2">
 <caption>Terribly relevant tabular information.</caption>
-
 <tbody>
 <tr>
 <td align='center' valign='top'>Star </td>
@@ -1209,8 +1206,6 @@ v(p,\lambda)_{\pm} & = & \pm\lambda(E\mp\lambda|{\textbf{p}}|)^{1/2}\chi
 <TableComments>We can also attach a long-ish paragraph of explanatory material to a table. Use \tablerefs to append a list of references. The following references were from a different table: I've patched them in here to show how they look, but don't take them too seriously—I certainly have not.</TableComments>
 <TableRefs>(1) Barbuy, Spite, & Spite 1985; (2) Bond 1980; (3) Carbon et al. 1987; (4) Hobbs & Duncan 1987; (5) Gilroy et al. 1988: (6) Gratton & Ortolani 1986; (7) Gratton & Sneden 1987; (8) Gratton & Sneden (1988); (9) Gratton & Sneden 1991; (10) Kraft et al. 1982; (11) LCL, or Laird, 1990; (12) Leep & Wallerstein 1981; (13) Luck & Bond 1981; (14) Luck & Bond 1985; (15) Magain 1987; (16) Magain 1989; (17) Peterson 1981; (18) Peterson, Kurucz, & Carney 1990; (19) RMB; (20) Schuster & Nissen 1988; (21) Schuster & Nissen 1989b; (22) Spite et al. 1984; (23) Spite & Spite 1986; (24) Hobbs & Thorburn 1991; (25) Hobbs et al. 1991; (26) Olsen 1983.</TableRefs>
 </table>
-
-
 </section>
 
 </article>
\ No newline at end of file
diff --git a/src/OutputParams.cpp b/src/OutputParams.cpp
index 465282c..a7e0fd5 100644
--- a/src/OutputParams.cpp
+++ b/src/OutputParams.cpp
@@ -35,7 +35,7 @@ OutputParams::OutputParams(Encoding const * enc)
 	  html_disable_captions(false), html_in_par(false),
 	  html_make_pars(true), docbook_in_par(false), docbook_make_pars(true),
 	  docbook_force_pars(false), docbook_anchors_to_ignore(std::set<docstring>()), docbook_in_float(false),
-	  docbook_in_listing(false), for_toc(false), for_tooltip(false),
+	  docbook_in_listing(false), docbook_in_table(false), for_toc(false), for_tooltip(false),
 	  for_search(false), for_preview(false), includeall(false)
 {
 	// Note: in PreviewLoader::Impl::dumpPreamble
diff --git a/src/OutputParams.h b/src/OutputParams.h
index c2e85dc..31a7059 100644
--- a/src/OutputParams.h
+++ b/src/OutputParams.h
@@ -369,6 +369,9 @@ public:
 	/// Is the current context a listing?
 	bool docbook_in_listing;
 
+	/// Is the current context a table?
+	bool docbook_in_table;
+
 	/// Are we generating this material for inclusion in a TOC-like entity?
 	bool for_toc;
 
diff --git a/src/insets/InsetFloat.cpp b/src/insets/InsetFloat.cpp
index 1cd614e..b0a48f2 100644
--- a/src/insets/InsetFloat.cpp
+++ b/src/insets/InsetFloat.cpp
@@ -563,32 +563,71 @@ const InsetCaption* findCaptionInParagraph(const Paragraph &par)
 }
 
 
-void InsetFloat::docbook(XMLStream & xs, OutputParams const & runparams) const
+void docbookSubfigures(XMLStream & xs, OutputParams const & runparams, const InsetCaption * caption,
+					   const InsetLabel * label, std::vector<const InsetBox *> & subfigures)
 {
-	// Determine whether the float has a title or not. For this, iterate through the paragraphs and look
-	// for an InsetCaption. Do the same for labels and subfigures.
-	// The caption and the label for each subfigure is handled by recursive calls.
-	const InsetCaption* caption = nullptr;
-	const InsetLabel* label = nullptr;
-	std::vector<const InsetBox *> subfigures;
+	// Ensure there is no label output, it is supposed to be handled as xml:id.
+	OutputParams rpNoLabel = runparams;
+	if (label)
+		rpNoLabel.docbook_anchors_to_ignore.emplace(label->screenLabel());
 
-	auto end = paragraphs().end();
-	for (auto it = paragraphs().begin(); it != end; ++it) {
-		std::vector<const InsetBox *> foundSubfigures = findSubfiguresInParagraph(*it);
-		if (!foundSubfigures.empty()) {
-			subfigures.reserve(subfigures.size() + foundSubfigures.size());
-			subfigures.insert(subfigures.end(), foundSubfigures.begin(), foundSubfigures.end());
+	// First, open the formal group.
+	docstring attr = docstring();
+	if (label)
+		attr += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
+
+	xs.startDivision(false);
+	xs << xml::StartTag("formalgroup", attr);
+	xs << xml::CR();
+
+	xs << xml::StartTag("title", attr);
+	if (caption) {
+		caption->getCaptionAsDocBook(xs, rpNoLabel);
+	} else {
+		xs << "No caption";
+		// No caption has been detected, but this tag is required for the document to be valid DocBook.
+	}
+	xs << xml::EndTag("title");
+	xs << xml::CR();
+
+	// Deal with each subfigure individually. This should also deal with their caption and their label.
+	// This should be a recursive call to InsetFloat.
+	for (const InsetBox *subfigure: subfigures) {
+		// If there is no InsetFloat in the paragraphs, output a warning.
+		bool foundInsetFloat = false;
+		for (const auto & it : subfigure->paragraphs()) {
+			for (pos_type posIn = 0; posIn < it.size(); ++posIn) {
+				const Inset *inset = it.getInset(posIn);
+				if (inset && dynamic_cast<const InsetFloat*>(inset)) {
+					foundInsetFloat = true;
+					break;
+				}
+			}
+
+			if (foundInsetFloat)
+				break;
 		}
 
-		if (!caption)
-			caption = findCaptionInParagraph(*it);
-		if (!label)
-			label = findLabelInParagraph(*it);
+		if (!foundInsetFloat)
+			xs << XMLStream::ESCAPE_NONE << "Error: no float found in the box. "
+								"To use subfigures in DocBook, elements must be wrapped in a float "
+			                    "inset and have a title/caption.";
+		// TODO: could also output a table, that would ensure that the document is correct and *displays* correctly (but without the right semantics), instead of just an error.
+
+		// Finally, recurse.
+		subfigure->docbook(xs, runparams);
 	}
 
-	// Gather a few things from global environment that are shared between all following cases.
-	FloatList const &floats = buffer().params().documentClass().floats();
-	Floating const &ftype = floats.getType(params_.type);
+	// Every subfigure is done: close the formal group.
+	xs << xml::EndTag("formalgroup");
+	xs << xml::CR();
+	xs.endDivision();
+}
+
+
+void docbookNoSubfigures(XMLStream & xs, OutputParams const & runparams, const InsetCaption * caption,
+                         const InsetLabel * label, Floating const & ftype, const InsetFloat * thisFloat)
+{
 	string const &titleTag = ftype.docbookCaption();
 
 	// Ensure there is no label output, it is supposed to be handled as xml:id.
@@ -600,65 +639,10 @@ void InsetFloat::docbook(XMLStream & xs, OutputParams const & runparams) const
 	// captions, they cannot appear at the end of the float, albeit LyX is happy with that).
 	OutputParams rpNoTitle = runparams;
 	rpNoTitle.docbook_in_float = true;
+	if (ftype.floattype() == "table")
+		rpNoTitle.docbook_in_table = true;
 
-	// Deal with subfigures.
-	if (!subfigures.empty()) {
-		// First, open the formal group.
-		docstring attr = docstring();
-		if (label)
-			attr += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
-
-		xs.startDivision(false);
-		xs << xml::StartTag("formalgroup", attr);
-		xs << xml::CR();
-
-		xs << xml::StartTag("title", attr);
-		if (caption) {
-			caption->getCaptionAsDocBook(xs, rpNoLabel);
-		} else {
-			xs << "No caption";
-			// No caption has been detected, but this tag is required for the document to be valid DocBook.
-		}
-		xs << xml::EndTag("title");
-		xs << xml::CR();
-
-		// Deal with each subfigure individually. This should also deal with their caption and their label.
-		// This should be a recursive call to InsetFloat.
-		for (const InsetBox *subfigure: subfigures) {
-			// If there is no InsetFloat in the paragraphs, output a warning.
-			bool foundInsetFloat = false;
-			for (auto it = subfigure->paragraphs().begin(); it != subfigure->paragraphs().end(); ++it) {
-				for (pos_type posIn = 0; posIn < it->size(); ++posIn) {
-					const Inset *inset = it->getInset(posIn);
-					if (inset && dynamic_cast<const InsetFloat*>(inset)) {
-						foundInsetFloat = true;
-						break;
-					}
-				}
-
-				if (foundInsetFloat)
-					break;
-			}
-
-			if (!foundInsetFloat)
-				xs << XMLStream::ESCAPE_NONE << "Error: no float found in the box. "
-									"To use subfigures in DocBook, elements must be wrapped in a float "
-				                    "inset and have a title/caption.";
-			// TODO: could also output a table, that would ensure that the document is correct and *displays* correctly (but without the right semantics), instead of just an error.
-
-			// Finally, recurse.
-			subfigure->docbook(xs, runparams);
-		}
-
-		// Every subfigure is done: close the formal group.
-		xs << xml::EndTag("formalgroup");
-		xs << xml::CR();
-		xs.endDivision();
-	}
-
-	// Here, ensured not to have subfigures.
-
-	// Organisation: <float> <title if any/> <contents without title/> </float>
+	// Organisation: <float> <title if any/> <contents without title/> </float>.
 	docstring attr = docstring();
 	if (label)
 		attr += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
@@ -676,12 +660,47 @@ void InsetFloat::docbook(XMLStream & xs, OutputParams const & runparams) const
 		xs << xml::EndTag(titleTag);
 		xs << xml::CR();
 	}
-	InsetText::docbook(xs, rpNoTitle);
+	thisFloat->InsetText::docbook(xs, rpNoTitle);
 	xs << xml::EndTag(ftype.docbookTag(caption != nullptr));
 	xs << xml::CR();
 }
 
 
+void InsetFloat::docbook(XMLStream & xs, OutputParams const & runparams) const
+{
+	// Determine whether the float has a title or not. For this, iterate through the paragraphs and look
+	// for an InsetCaption. Do the same for labels and subfigures.
+	// The caption and the label for each subfigure is handled by recursive calls.
+	const InsetCaption* caption = nullptr;
+	const InsetLabel* label = nullptr;
+	std::vector<const InsetBox *> subfigures;
+
+	auto end = paragraphs().end();
+	for (auto it = paragraphs().begin(); it != end; ++it) {
+		std::vector<const InsetBox *> foundSubfigures = findSubfiguresInParagraph(*it);
+		if (!foundSubfigures.empty()) {
+			subfigures.reserve(subfigures.size() + foundSubfigures.size());
+			subfigures.insert(subfigures.end(), foundSubfigures.begin(), foundSubfigures.end());
+		}
+
+		if (!caption)
+			caption = findCaptionInParagraph(*it);
+		if (!label)
+			label = findLabelInParagraph(*it);
+	}
+
+	// Gather a few things from global environment that are shared between all following cases.
+	FloatList const &floats = buffer().params().documentClass().floats();
+	Floating const &ftype = floats.getType(params_.type);
+
+	// Switch on subfigures.
+	if (!subfigures.empty())
+		docbookSubfigures(xs, runparams, caption, label, subfigures);
+	else
+		docbookNoSubfigures(xs, runparams, caption, label, ftype, this);
+}
+
+
 bool InsetFloat::insetAllowed(InsetCode code) const
 {
 	// The case that code == FLOAT_CODE is handled in Text3.cpp,
diff --git a/src/insets/InsetTabular.cpp b/src/insets/InsetTabular.cpp
index 87988af..b449ab3 100644
--- a/src/insets/InsetTabular.cpp
+++ b/src/insets/InsetTabular.cpp
@@ -3658,8 +3658,8 @@ void Tabular::docbook(XMLStream & xs, OutputParams const & runparams) const
 	docstring ret;
 
 	// Some tables are inline. Likely limitation: cannot output a table within a table; is that really a limitation?
-	bool hasTableStarted = xs.isTagOpen(xml::StartTag("informaltable")) || xs.isTagOpen(xml::StartTag("table"));
-	if (!hasTableStarted) {
+	if (!runparams.docbook_in_table) { // Check on the *outer* set of parameters, so that the table can be closed
+		// properly at the end of this function.
 		xs << xml::StartTag("informaltable");
 		xs << xml::CR();
 	}
@@ -3742,7 +3742,7 @@ void Tabular::docbook(XMLStream & xs, OutputParams const & runparams) const
 	xs << xml::CR();
 
 	// If this method started the table tag, also make it close it.
-	if (!hasTableStarted) {
+	if (!runparams.docbook_in_table) {
 		xs << xml::EndTag("informaltable");
 		xs << xml::CR();
 	}
diff --git a/src/output_docbook.cpp b/src/output_docbook.cpp
index 94aac87..5e15edc 100644
--- a/src/output_docbook.cpp
+++ b/src/output_docbook.cpp
@@ -463,14 +463,26 @@ ParagraphList::const_iterator makeParagraphs(
 				((open_par && (!runparams.docbook_in_par || nextpar != pend))
 				|| (!open_par && runparams.docbook_in_par && par == pbegin && nextpar != pend));
 
-		if (open_par)
-			openParTag(xs, lay);
+		// Determine if this paragraph has some real content. Things like new pages are not caught
+		// by Paragraph::empty(), even though they do not generate anything useful in DocBook.
+		odocstringstream os2;
+		XMLStream xs2(os2);
+		par->simpleDocBookOnePar(buf, xs2, runparams, text.outerFont(distance(begin, par)), open_par, close_par, 0);
 
-		par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)), open_par, close_par, 0);
+		docstring cleaned = os2.str();
+		static const lyx::regex reg("[ \\r\\n]*");
+		cleaned = from_utf8(lyx::regex_replace(to_utf8(cleaned), reg, string("")));
 
-		if (close_par) {
-			closeTag(xs, lay);
-			xs << xml::CR();
+		if (!cleaned.empty()) {
+			if (open_par)
+				openParTag(xs, lay);
+
+			xs << XMLStream::ESCAPE_NONE << os2.str();
+
+			if (close_par) {
+				closeTag(xs, lay);
+				xs << xml::CR();
+			}
 		}
 	}
 	return pend;


More information about the lyx-cvs mailing list