[LyX/master] ctests: supported-languages Fix inputenc Error: Invalid UTF-8 byte "A0".

Günter Milde milde at lyx.org
Mon Dec 30 16:34:31 UTC 2019


commit 8f9dfd2edf741f7c63859cacbecf4d9bc32f679e
Author: Günter Milde <milde at lyx.org>
Date:   Mon Dec 30 17:48:20 2019 +0100

    ctests: supported-languages Fix inputenc Error: Invalid UTF-8 byte "A0".
    
    This happens with "inputenc: auto-legacy" if a language with default
    encoding "utf8" (e.g. Turkmen or Mongolian) is used in a Quote
    (or another environment).
---
 .../export/latex/languages/supported-languages.lyx |  522 ++++++++++++++++++--
 .../supported-languages_babel_auto-legacy.lyx      |   37 ++-
 development/autotests/ignoredTests                 |    1 +
 development/autotests/invertedTests                |    8 +
 4 files changed, 520 insertions(+), 48 deletions(-)

diff --git a/autotests/export/latex/languages/supported-languages.lyx b/autotests/export/latex/languages/supported-languages.lyx
index 7813d13..6a78320 100644
--- a/autotests/export/latex/languages/supported-languages.lyx
+++ b/autotests/export/latex/languages/supported-languages.lyx
@@ -1828,6 +1828,21 @@ Dummy paragraph to set language inside quote
 \end_layout
 
 \begin_layout Standard
+
+\lang czech
+\begin_inset Note Note
+status open
+
+\begin_layout Plain Layout
+Dummy paragraph to reset input encoding
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
 \begin_inset Branch debug
 inverted 0
 status collapsed
@@ -1962,29 +1977,6 @@ F0 ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ
 
 \end_layout
 
-\begin_layout Standard
-
-\lang czech
-\begin_inset Note Note
-status open
-
-\begin_layout Plain Layout
-Czech dummy paragraph to reset input-encoding with 
-\begin_inset Quotes eld
-\end_inset
-
-auto-legacy
-\begin_inset Quotes erd
-\end_inset
-
-.
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
 \begin_layout Labeling
 \labelwidthstring 00.00.0000
 Babel farsi
@@ -3082,29 +3074,6 @@ magyar nyelv
  a komi, a mari és a mordvin nyelvek.
 \end_layout
 
-\begin_layout Standard
-
-\lang czech
-\begin_inset Note Note
-status open
-
-\begin_layout Plain Layout
-Czech dummy paragraph to reset input-encoding with 
-\begin_inset Quotes eld
-\end_inset
-
-auto-legacy
-\begin_inset Quotes erd
-\end_inset
-
-.
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
 \begin_layout Labeling
 \labelwidthstring 00.00.0000
 Babel magyar
@@ -3852,6 +3821,20 @@ Kurmancî an kurmanciya jorîn yek ji zaravayên zimanê kurdî ye.
 \end_layout
 
 \begin_layout Standard
+
+\lang czech
+\begin_inset Note Note
+status open
+
+\begin_layout Plain Layout
+Dummy paragraph to reset input encoding with inputenc=auto-legacy
+\end_layout
+
+\end_inset
+
+
+\lang english
+
 \begin_inset Branch debug
 inverted 0
 status collapsed
@@ -5087,6 +5070,155 @@ status open
 
 \end_layout
 
+\begin_layout Standard
+
+\lang czech
+\begin_inset Note Note
+status open
+
+\begin_layout Plain Layout
+Dummy paragraph to reset input encoding with inputenc=auto-legacy
+\end_layout
+
+\end_inset
+
+
+\lang english
+
+\begin_inset Branch debug
+inverted 0
+status collapsed
+
+\begin_layout Standard
+The language is 
+\begin_inset Quotes eld
+\end_inset
+
+
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+languagename
+\end_layout
+
+\end_inset
+
+
+\begin_inset Quotes erd
+\end_inset
+
+, the input encoding 
+\begin_inset Quotes eld
+\end_inset
+
+
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+inputencodingname
+\end_layout
+
+\end_inset
+
+
+\begin_inset Quotes erd
+\end_inset
+
+ and the font encoding 
+\begin_inset Quotes eld
+\end_inset
+
+
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+makeatletter
+\backslash
+f at encoding
+\backslash
+makeatother
+\end_layout
+
+\end_inset
+
+
+\begin_inset Quotes erd
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Standard
+A0   ¡ ¢ £ ¤ ¥ ¦ § 
+\begin_inset Note Note
+status open
+
+\begin_layout Plain Layout
+¨
+\end_layout
+
+\end_inset
+
+ © ª « ¬ ® 
+\begin_inset Note Note
+status open
+
+\begin_layout Plain Layout
+¯
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+B0 ° ± ² ³ 
+\begin_inset Note Note
+status open
+
+\begin_layout Plain Layout
+´
+\end_layout
+
+\end_inset
+
+ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿
+\end_layout
+
+\begin_layout Standard
+C0 À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï
+\end_layout
+
+\begin_layout Standard
+D0 Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß
+\end_layout
+
+\begin_layout Standard
+E0 à á â ã ä å æ ç è é ê ë ì í î ï
+\end_layout
+
+\begin_layout Standard
+F0 ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
 \begin_layout Labeling
 \labelwidthstring 00.00.0000
 LyX serbian
@@ -5901,6 +6033,155 @@ Türkmen dili
  Türkmen dili - türki dilleriň oguz dilleri toparyna degişlidir.
 \end_layout
 
+\begin_layout Standard
+
+\lang czech
+\begin_inset Note Note
+status open
+
+\begin_layout Plain Layout
+Dummy paragraph to reset input encoding with inputenc=auto-legacy
+\end_layout
+
+\end_inset
+
+
+\lang english
+
+\begin_inset Branch debug
+inverted 0
+status collapsed
+
+\begin_layout Standard
+The language is 
+\begin_inset Quotes eld
+\end_inset
+
+
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+languagename
+\end_layout
+
+\end_inset
+
+
+\begin_inset Quotes erd
+\end_inset
+
+, the input encoding 
+\begin_inset Quotes eld
+\end_inset
+
+
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+inputencodingname
+\end_layout
+
+\end_inset
+
+
+\begin_inset Quotes erd
+\end_inset
+
+ and the font encoding 
+\begin_inset Quotes eld
+\end_inset
+
+
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+makeatletter
+\backslash
+f at encoding
+\backslash
+makeatother
+\end_layout
+
+\end_inset
+
+
+\begin_inset Quotes erd
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Standard
+A0   ¡ ¢ £ ¤ ¥ ¦ § 
+\begin_inset Note Note
+status open
+
+\begin_layout Plain Layout
+¨
+\end_layout
+
+\end_inset
+
+ © ª « ¬ ® 
+\begin_inset Note Note
+status open
+
+\begin_layout Plain Layout
+¯
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+B0 ° ± ² ³ 
+\begin_inset Note Note
+status open
+
+\begin_layout Plain Layout
+´
+\end_layout
+
+\end_inset
+
+ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿
+\end_layout
+
+\begin_layout Standard
+C0 À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï
+\end_layout
+
+\begin_layout Standard
+D0 Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß
+\end_layout
+
+\begin_layout Standard
+E0 à á â ã ä å æ ç è é ê ë ì í î ï
+\end_layout
+
+\begin_layout Standard
+F0 ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
 \begin_layout Labeling
 \labelwidthstring 00.00.0000
 Babel turkmen
@@ -6045,6 +6326,155 @@ Việt ngữ
 
 \end_layout
 
+\begin_layout Standard
+
+\lang czech
+\begin_inset Note Note
+status open
+
+\begin_layout Plain Layout
+Dummy paragraph to reset input encoding with inputenc=auto-legacy
+\end_layout
+
+\end_inset
+
+
+\lang english
+
+\begin_inset Branch debug
+inverted 0
+status collapsed
+
+\begin_layout Standard
+The language is 
+\begin_inset Quotes eld
+\end_inset
+
+
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+languagename
+\end_layout
+
+\end_inset
+
+
+\begin_inset Quotes erd
+\end_inset
+
+, the input encoding 
+\begin_inset Quotes eld
+\end_inset
+
+
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+inputencodingname
+\end_layout
+
+\end_inset
+
+
+\begin_inset Quotes erd
+\end_inset
+
+ and the font encoding 
+\begin_inset Quotes eld
+\end_inset
+
+
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+makeatletter
+\backslash
+f at encoding
+\backslash
+makeatother
+\end_layout
+
+\end_inset
+
+
+\begin_inset Quotes erd
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Standard
+A0   ¡ ¢ £ ¤ ¥ ¦ § 
+\begin_inset Note Note
+status open
+
+\begin_layout Plain Layout
+¨
+\end_layout
+
+\end_inset
+
+ © ª « ¬ ® 
+\begin_inset Note Note
+status open
+
+\begin_layout Plain Layout
+¯
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+B0 ° ± ² ³ 
+\begin_inset Note Note
+status open
+
+\begin_layout Plain Layout
+´
+\end_layout
+
+\end_inset
+
+ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿
+\end_layout
+
+\begin_layout Standard
+C0 À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï
+\end_layout
+
+\begin_layout Standard
+D0 Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß
+\end_layout
+
+\begin_layout Standard
+E0 à á â ã ä å æ ç è é ê ë ì í î ï
+\end_layout
+
+\begin_layout Standard
+F0 ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
 \begin_layout Labeling
 \labelwidthstring 00.00.0000
 Babel vietnamese
diff --git a/autotests/export/latex/languages/supported-languages_babel_auto-legacy.lyx b/autotests/export/latex/languages/supported-languages_babel_auto-legacy.lyx
index fdda28a..107fb04 100644
--- a/autotests/export/latex/languages/supported-languages_babel_auto-legacy.lyx
+++ b/autotests/export/latex/languages/supported-languages_babel_auto-legacy.lyx
@@ -169,9 +169,42 @@ auto-legacy
 
 , i.e.
  each language uses its own default legacy input encoding.
+\end_layout
 
-\emph on
- 
+\begin_layout Quote
+
+\series bold
+Bug
+\series default
+ (in LyX or LaTeX?): If a language change happens in an environment (e.g.
+ a Quote), the input encoding 
+\begin_inset Quotes eld
+\end_inset
+
+utf8
+\begin_inset Quotes erd
+\end_inset
+
+ is not properly switched back after leaving the environment: the 
+\backslash
+inputencodingname is back to the encoding used before, but charcters in
+ the 
+\begin_inset Quotes eld
+\end_inset
+
+high-bit
+\begin_inset Quotes erd
+\end_inset
+
+ range produce utf8-errors (see 
+\begin_inset Quotes eld
+\end_inset
+
+nested-inputenc_auto-legacy.lyx
+\begin_inset Quotes erd
+\end_inset
+
+).
 \end_layout
 
 \begin_layout Standard
diff --git a/development/autotests/ignoredTests b/development/autotests/ignoredTests
index 3061a1d..7f8b844 100644
--- a/development/autotests/ignoredTests
+++ b/development/autotests/ignoredTests
@@ -179,6 +179,7 @@ export/export/latex/inputenc-.*_systemF # 11522 still open but already tested
 export/export/latex/languages/supported-languages_babel_(pdf|dvi)._systemF
 export/export/latex/languages/supported-languages_babel_auto-legacy_.*_systemF
 export/export/latex/lyxbugs/6197-polish-amssymb_pdf5_systemF
+export/export/latex/languages/nested-inputenc_auto-legacy_.*_systemF
 # wrong output but does not fail:
 export/export/latex/lyxbugs/3059-language-in-tables_.*_systemF
 
diff --git a/development/autotests/invertedTests b/development/autotests/invertedTests
index d94486f..5c54fdb 100644
--- a/development/autotests/invertedTests
+++ b/development/autotests/invertedTests
@@ -84,6 +84,14 @@ export/examples/ja/Graphics_and_Insets/XY-Pic.*_systemF
 export/doc/he/.*pdf5_systemF
 export/.*/fa/Welcome_(dvi3|pdf5)_systemF
 
+Bug (in LyX or LaTeX?): 
+If a language change happens in an environment, LyX does not write an
+explicit \inputencoding{<new-default>} when switching back to the "outer"
+language. However, without explicit \inputencoding, the input encoding
+“utf8” is not properly switched back and high-bit characters like "ä" produce
+an "inputenc Error: Invalid UTF-8 byte "A0"".
+export/export/latex/languages/nested-inputenc_auto-legacy_pdf2
+
 
 # ================================================
 Sublabel: lyxbugs


More information about the lyx-cvs mailing list