475 lines
55 KiB
HTML
475 lines
55 KiB
HTML
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||
|
<html>
|
||
|
<head>
|
||
|
<title>4/sm</title>
|
||
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
||
|
<meta http-equiv="Content-Language" content="en-gb">
|
||
|
<link href="inweb.css" rel="stylesheet" rev="stylesheet" type="text/css">
|
||
|
</head>
|
||
|
<body>
|
||
|
|
||
|
<!--Weave of '4/tf' generated by inweb 6P91-->
|
||
|
<ul class="crumbs"><li><a href="../webs.html">★</a></li><li><a href="index.html">foundation</a></li><li><a href="index.html#4">Chapter 4: Text Handling</a></li><li><b>Text Files</b></li></ul><p class="purpose">To read text files of whatever flavour, one line at a time.</p>
|
||
|
|
||
|
<ul class="toc"><li><a href="#SP1">§1. Text files</a></li><li><a href="#SP2">§2. Text file positions</a></li><li><a href="#SP5">§5. Text file scanner</a></li><li><a href="#SP8">§8. Reading UTF-8 files</a></li></ul><hr class="tocbar">
|
||
|
|
||
|
<p class="inwebparagraph"><a id="SP1"></a><b>§1. Text files. </b>Foundation was written mainly to support command-line tools which, of their
|
||
|
nature, deal with a lot of text files: source code of programs, configuration
|
||
|
files, HTML, XML and so on. The main aim of this section is to provide a
|
||
|
standard way to read in and iterate through lines of a text file.
|
||
|
</p>
|
||
|
|
||
|
<p class="inwebparagraph">First, though, here is a perhaps clumsy but effective way to test if a
|
||
|
file actually exists on disc at a given filename:
|
||
|
</p>
|
||
|
|
||
|
|
||
|
<pre class="display">
|
||
|
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">TextFiles::exists</span><span class="plain">(</span><span class="reserved">filename</span><span class="plain"> *</span><span class="identifier">F</span><span class="plain">) {</span>
|
||
|
<span class="reserved">FILE</span><span class="plain"> *</span><span class="identifier">HANDLE</span><span class="plain"> = </span><span class="functiontext">Filenames::fopen</span><span class="plain">(</span><span class="identifier">F</span><span class="plain">, </span><span class="string">"rb"</span><span class="plain">);</span>
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">HANDLE</span><span class="plain"> == </span><span class="identifier">NULL</span><span class="plain">) </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">FALSE</span><span class="plain">;</span>
|
||
|
<span class="identifier">fclose</span><span class="plain">(</span><span class="identifier">HANDLE</span><span class="plain">);</span>
|
||
|
<span class="reserved">return</span><span class="plain"> </span><span class="constant">TRUE</span><span class="plain">;</span>
|
||
|
<span class="plain">}</span>
|
||
|
</pre>
|
||
|
|
||
|
<p class="inwebparagraph"></p>
|
||
|
|
||
|
<p class="endnote">The function TextFiles::exists appears nowhere else.</p>
|
||
|
|
||
|
<p class="inwebparagraph"><a id="SP2"></a><b>§2. Text file positions. </b>Here's how we record a position in a text file:
|
||
|
</p>
|
||
|
|
||
|
|
||
|
<pre class="display">
|
||
|
<span class="reserved">typedef</span><span class="plain"> </span><span class="reserved">struct</span><span class="plain"> </span><span class="reserved">text_file_position</span><span class="plain"> {</span>
|
||
|
<span class="reserved">struct</span><span class="plain"> </span><span class="reserved">filename</span><span class="plain"> *</span><span class="identifier">text_file_filename</span><span class="plain">;</span>
|
||
|
<span class="reserved">FILE</span><span class="plain"> *</span><span class="identifier">handle_when_open</span><span class="plain">;</span>
|
||
|
<span class="reserved">struct</span><span class="plain"> </span><span class="reserved">unicode_file_buffer</span><span class="plain"> </span><span class="identifier">ufb</span><span class="plain">;</span>
|
||
|
<span class="reserved">int</span><span class="plain"> </span><span class="identifier">line_count</span><span class="plain">; </span> <span class="comment">counting from 1</span>
|
||
|
<span class="reserved">int</span><span class="plain"> </span><span class="identifier">line_position</span><span class="plain">;</span>
|
||
|
<span class="reserved">int</span><span class="plain"> </span><span class="identifier">skip_terminator</span><span class="plain">;</span>
|
||
|
<span class="reserved">int</span><span class="plain"> </span><span class="identifier">actively_scanning</span><span class="plain">; </span> <span class="comment">whether we are still interested in the rest of the file</span>
|
||
|
<span class="plain">} </span><span class="reserved">text_file_position</span><span class="plain">;</span>
|
||
|
</pre>
|
||
|
|
||
|
<p class="inwebparagraph"></p>
|
||
|
|
||
|
<p class="endnote">The structure text_file_position is accessed in 3/em and here.</p>
|
||
|
|
||
|
<p class="inwebparagraph"><a id="SP3"></a><b>§3. </b>For access:
|
||
|
</p>
|
||
|
|
||
|
|
||
|
<pre class="display">
|
||
|
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">TextFiles::get_line_count</span><span class="plain">(</span><span class="reserved">text_file_position</span><span class="plain"> *</span><span class="identifier">tfp</span><span class="plain">) {</span>
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">tfp</span><span class="plain"> == </span><span class="identifier">NULL</span><span class="plain">) </span><span class="reserved">return</span><span class="plain"> 0;</span>
|
||
|
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">tfp</span><span class="plain">-</span><span class="element">>line_count</span><span class="plain">;</span>
|
||
|
<span class="plain">}</span>
|
||
|
</pre>
|
||
|
|
||
|
<p class="inwebparagraph"></p>
|
||
|
|
||
|
<p class="endnote">The function TextFiles::get_line_count appears nowhere else.</p>
|
||
|
|
||
|
<p class="inwebparagraph"><a id="SP4"></a><b>§4. </b>And this is for a real nowhere man:
|
||
|
</p>
|
||
|
|
||
|
|
||
|
<pre class="display">
|
||
|
<span class="reserved">text_file_position</span><span class="plain"> </span><span class="functiontext">TextFiles::nowhere</span><span class="plain">(</span><span class="reserved">void</span><span class="plain">) {</span>
|
||
|
<span class="reserved">text_file_position</span><span class="plain"> </span><span class="identifier">tfp</span><span class="plain">;</span>
|
||
|
<span class="identifier">tfp</span><span class="element">.text_file_filename</span><span class="plain"> = </span><span class="identifier">NULL</span><span class="plain">;</span>
|
||
|
<span class="identifier">tfp</span><span class="element">.line_count</span><span class="plain"> = 0;</span>
|
||
|
<span class="identifier">tfp</span><span class="element">.line_position</span><span class="plain"> = 0;</span>
|
||
|
<span class="identifier">tfp</span><span class="element">.skip_terminator</span><span class="plain"> = </span><span class="constant">FALSE</span><span class="plain">;</span>
|
||
|
<span class="identifier">tfp</span><span class="element">.actively_scanning</span><span class="plain"> = </span><span class="constant">FALSE</span><span class="plain">;</span>
|
||
|
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">tfp</span><span class="plain">;</span>
|
||
|
<span class="plain">}</span>
|
||
|
</pre>
|
||
|
|
||
|
<p class="inwebparagraph"></p>
|
||
|
|
||
|
<p class="endnote">The function TextFiles::nowhere appears nowhere else.</p>
|
||
|
|
||
|
<p class="inwebparagraph"><a id="SP5"></a><b>§5. Text file scanner. </b>We read lines in, delimited by any of the standard line-ending characters,
|
||
|
and send them one at a time to a function called <code class="display"><span class="extract">iterator</span></code>. Throughout,
|
||
|
we preserve a pointer called <code class="display"><span class="extract">state</span></code> to some object being used by the
|
||
|
client.
|
||
|
</p>
|
||
|
|
||
|
|
||
|
<pre class="display">
|
||
|
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">TextFiles::read</span><span class="plain">(</span><span class="reserved">filename</span><span class="plain"> *</span><span class="identifier">F</span><span class="plain">, </span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">escape_oddities</span><span class="plain">, </span><span class="reserved">char</span><span class="plain"> *</span><span class="identifier">message</span><span class="plain">, </span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">serious</span><span class="plain">,</span>
|
||
|
<span class="reserved">void</span><span class="plain"> (</span><span class="identifier">iterator</span><span class="plain">)(</span><span class="reserved">text_stream</span><span class="plain"> *, </span><span class="reserved">text_file_position</span><span class="plain"> *, </span><span class="reserved">void</span><span class="plain"> *),</span>
|
||
|
<span class="reserved">text_file_position</span><span class="plain"> *</span><span class="identifier">start_at</span><span class="plain">, </span><span class="reserved">void</span><span class="plain"> *</span><span class="identifier">state</span><span class="plain">) {</span>
|
||
|
<span class="reserved">text_file_position</span><span class="plain"> </span><span class="identifier">tfp</span><span class="plain">;</span>
|
||
|
<span class="identifier">tfp</span><span class="element">.ufb</span><span class="plain"> = </span><span class="functiontext">TextFiles::create_ufb</span><span class="plain">();</span>
|
||
|
<<span class="cwebmacro">Open the text file</span> <span class="cwebmacronumber">5.1</span>><span class="plain">;</span>
|
||
|
<<span class="cwebmacro">Set the initial position, seeking it in the file if need be</span> <span class="cwebmacronumber">5.2</span>><span class="plain">;</span>
|
||
|
<<span class="cwebmacro">Read in lines and send them one by one to the iterator</span> <span class="cwebmacronumber">5.3</span>><span class="plain">;</span>
|
||
|
<span class="identifier">fclose</span><span class="plain">(</span><span class="identifier">tfp</span><span class="element">.handle_when_open</span><span class="plain">);</span>
|
||
|
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">tfp</span><span class="element">.line_count</span><span class="plain">;</span>
|
||
|
<span class="plain">}</span>
|
||
|
</pre>
|
||
|
|
||
|
<p class="inwebparagraph"></p>
|
||
|
|
||
|
<p class="endnote">The function TextFiles::read is used in 5/htm (<a href="5-htm.html#SP10">§10</a>).</p>
|
||
|
|
||
|
<p class="inwebparagraph"><a id="SP5_1"></a><b>§5.1. </b><code class="display">
|
||
|
<<span class="cwebmacrodefn">Open the text file</span> <span class="cwebmacronumber">5.1</span>> =
|
||
|
</code></p>
|
||
|
|
||
|
|
||
|
<pre class="displaydefn">
|
||
|
<span class="identifier">tfp</span><span class="element">.handle_when_open</span><span class="plain"> = </span><span class="functiontext">Filenames::fopen</span><span class="plain">(</span><span class="identifier">F</span><span class="plain">, </span><span class="string">"rb"</span><span class="plain">);</span>
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">tfp</span><span class="element">.handle_when_open</span><span class="plain"> == </span><span class="identifier">NULL</span><span class="plain">) {</span>
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">message</span><span class="plain"> == </span><span class="identifier">NULL</span><span class="plain">) </span><span class="reserved">return</span><span class="plain"> 0;</span>
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">serious</span><span class="plain">) </span><span class="functiontext">Errors::fatal_with_file</span><span class="plain">(</span><span class="identifier">message</span><span class="plain">, </span><span class="identifier">F</span><span class="plain">);</span>
|
||
|
<span class="reserved">else</span><span class="plain"> { </span><span class="functiontext">Errors::with_file</span><span class="plain">(</span><span class="identifier">message</span><span class="plain">, </span><span class="identifier">F</span><span class="plain">); </span><span class="reserved">return</span><span class="plain"> 0; }</span>
|
||
|
<span class="plain">}</span>
|
||
|
</pre>
|
||
|
|
||
|
<p class="inwebparagraph"></p>
|
||
|
|
||
|
<p class="endnote">This code is used in <a href="#SP5">§5</a>.</p>
|
||
|
|
||
|
<p class="inwebparagraph"><a id="SP5_2"></a><b>§5.2. </b>The ANSI definition of <code class="display"><span class="extract">ftell</span></code> and <code class="display"><span class="extract">fseek</span></code> says that, with text files, the
|
||
|
only definite position value is 0 — meaning the beginning of the file — and
|
||
|
this is what we initialise <code class="display"><span class="extract">line_position</span></code> to. We must otherwise only write
|
||
|
values returned by <code class="display"><span class="extract">ftell</span></code> into this field.
|
||
|
</p>
|
||
|
|
||
|
|
||
|
<p class="macrodefinition"><code class="display">
|
||
|
<<span class="cwebmacrodefn">Set the initial position, seeking it in the file if need be</span> <span class="cwebmacronumber">5.2</span>> =
|
||
|
</code></p>
|
||
|
|
||
|
|
||
|
<pre class="displaydefn">
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">start_at</span><span class="plain"> == </span><span class="identifier">NULL</span><span class="plain">) {</span>
|
||
|
<span class="identifier">tfp</span><span class="element">.line_count</span><span class="plain"> = 1;</span>
|
||
|
<span class="identifier">tfp</span><span class="element">.line_position</span><span class="plain"> = 0;</span>
|
||
|
<span class="identifier">tfp</span><span class="element">.skip_terminator</span><span class="plain"> = </span><span class="character">'X'</span><span class="plain">;</span>
|
||
|
<span class="plain">} </span><span class="reserved">else</span><span class="plain"> {</span>
|
||
|
<span class="identifier">tfp</span><span class="plain"> = *</span><span class="identifier">start_at</span><span class="plain">;</span>
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">fseek</span><span class="plain">(</span><span class="identifier">tfp</span><span class="element">.handle_when_open</span><span class="plain">, (</span><span class="reserved">long</span><span class="plain"> </span><span class="reserved">int</span><span class="plain">) (</span><span class="identifier">tfp</span><span class="element">.line_position</span><span class="plain">), </span><span class="identifier">SEEK_SET</span><span class="plain">)) {</span>
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">serious</span><span class="plain">) </span><span class="functiontext">Errors::fatal_with_file</span><span class="plain">(</span><span class="string">"unable to seek position in file"</span><span class="plain">, </span><span class="identifier">F</span><span class="plain">);</span>
|
||
|
<span class="functiontext">Errors::with_file</span><span class="plain">(</span><span class="string">"unable to seek position in file"</span><span class="plain">, </span><span class="identifier">F</span><span class="plain">);</span>
|
||
|
<span class="reserved">return</span><span class="plain"> 0;</span>
|
||
|
<span class="plain">}</span>
|
||
|
<span class="plain">}</span>
|
||
|
<span class="identifier">tfp</span><span class="element">.actively_scanning</span><span class="plain"> = </span><span class="constant">TRUE</span><span class="plain">;</span>
|
||
|
<span class="identifier">tfp</span><span class="element">.text_file_filename</span><span class="plain"> = </span><span class="identifier">F</span><span class="plain">;</span>
|
||
|
</pre>
|
||
|
|
||
|
<p class="inwebparagraph"></p>
|
||
|
|
||
|
<p class="endnote">This code is used in <a href="#SP5">§5</a>.</p>
|
||
|
|
||
|
<p class="inwebparagraph"><a id="SP5_3"></a><b>§5.3. </b>We aim to get this right whether the lines are terminated by <code class="display"><span class="extract">0A</span></code>, <code class="display"><span class="extract">0D</span></code>,
|
||
|
<code class="display"><span class="extract">0A 0D</span></code> or <code class="display"><span class="extract">0D 0A</span></code>. The final line is not required to be terminated.
|
||
|
</p>
|
||
|
|
||
|
|
||
|
<p class="macrodefinition"><code class="display">
|
||
|
<<span class="cwebmacrodefn">Read in lines and send them one by one to the iterator</span> <span class="cwebmacronumber">5.3</span>> =
|
||
|
</code></p>
|
||
|
|
||
|
|
||
|
<pre class="displaydefn">
|
||
|
<span class="identifier">TEMPORARY_TEXT</span><span class="plain">(</span><span class="identifier">line</span><span class="plain">);</span>
|
||
|
<span class="reserved">int</span><span class="plain"> </span><span class="identifier">i</span><span class="plain"> = 0, </span><span class="identifier">c</span><span class="plain"> = </span><span class="character">' '</span><span class="plain">;</span>
|
||
|
<span class="reserved">while</span><span class="plain"> ((</span><span class="identifier">c</span><span class="plain"> != </span><span class="identifier">EOF</span><span class="plain">) && (</span><span class="identifier">tfp</span><span class="element">.actively_scanning</span><span class="plain">)) {</span>
|
||
|
<span class="identifier">c</span><span class="plain"> = </span><span class="functiontext">TextFiles::utf8_fgetc</span><span class="plain">(</span><span class="identifier">tfp</span><span class="element">.handle_when_open</span><span class="plain">, </span><span class="identifier">NULL</span><span class="plain">, </span><span class="identifier">escape_oddities</span><span class="plain">, &</span><span class="identifier">tfp</span><span class="element">.ufb</span><span class="plain">);</span>
|
||
|
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">c</span><span class="plain"> == </span><span class="identifier">EOF</span><span class="plain">) || (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'\</span><span class="plain">x</span><span class="character">0a'</span><span class="plain">) || (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'\</span><span class="plain">x</span><span class="character">0d'</span><span class="plain">)) {</span>
|
||
|
<span class="functiontext">Str::put_at</span><span class="plain">(</span><span class="identifier">line</span><span class="plain">, </span><span class="identifier">i</span><span class="plain">, 0);</span>
|
||
|
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">i</span><span class="plain"> > 0) || (</span><span class="identifier">c</span><span class="plain"> != </span><span class="identifier">tfp</span><span class="element">.skip_terminator</span><span class="plain">)) {</span>
|
||
|
<<span class="cwebmacro">Feed the completed line to the iterator routine</span> <span class="cwebmacronumber">5.3.1</span>><span class="plain">;</span>
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'\</span><span class="plain">x</span><span class="character">0a'</span><span class="plain">) </span><span class="identifier">tfp</span><span class="element">.skip_terminator</span><span class="plain"> = </span><span class="character">'\</span><span class="plain">x</span><span class="character">0d'</span><span class="plain">;</span>
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'\</span><span class="plain">x</span><span class="character">0d'</span><span class="plain">) </span><span class="identifier">tfp</span><span class="element">.skip_terminator</span><span class="plain"> = </span><span class="character">'\</span><span class="plain">x</span><span class="character">0a'</span><span class="plain">;</span>
|
||
|
<span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="identifier">tfp</span><span class="element">.skip_terminator</span><span class="plain"> = </span><span class="character">'X'</span><span class="plain">;</span>
|
||
|
<<span class="cwebmacro">Update the text file position</span> <span class="cwebmacronumber">5.3.2</span>><span class="plain">;</span>
|
||
|
<span class="identifier">i</span><span class="plain"> = 0;</span>
|
||
|
<span class="plain">} </span><span class="reserved">else</span><span class="plain"> {</span>
|
||
|
<span class="functiontext">Str::put_at</span><span class="plain">(</span><span class="identifier">line</span><span class="plain">, </span><span class="identifier">i</span><span class="plain">++, (</span><span class="identifier">wchar_t</span><span class="plain">) </span><span class="identifier">c</span><span class="plain">);</span>
|
||
|
<span class="plain">}</span>
|
||
|
<span class="plain">}</span>
|
||
|
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">i</span><span class="plain"> > 0) && (</span><span class="identifier">tfp</span><span class="element">.actively_scanning</span><span class="plain">))</span>
|
||
|
<<span class="cwebmacro">Feed the completed line to the iterator routine</span> <span class="cwebmacronumber">5.3.1</span>><span class="plain">;</span>
|
||
|
<span class="identifier">DISCARD_TEXT</span><span class="plain">(</span><span class="identifier">line</span><span class="plain">);</span>
|
||
|
</pre>
|
||
|
|
||
|
<p class="inwebparagraph"></p>
|
||
|
|
||
|
<p class="endnote">This code is used in <a href="#SP5">§5</a>.</p>
|
||
|
|
||
|
<p class="inwebparagraph"><a id="SP5_3_1"></a><b>§5.3.1. </b>We update the line counter only when a line is actually sent:
|
||
|
</p>
|
||
|
|
||
|
|
||
|
<p class="macrodefinition"><code class="display">
|
||
|
<<span class="cwebmacrodefn">Feed the completed line to the iterator routine</span> <span class="cwebmacronumber">5.3.1</span>> =
|
||
|
</code></p>
|
||
|
|
||
|
|
||
|
<pre class="displaydefn">
|
||
|
<span class="identifier">iterator</span><span class="plain">(</span><span class="identifier">line</span><span class="plain">, &</span><span class="identifier">tfp</span><span class="plain">, </span><span class="identifier">state</span><span class="plain">);</span>
|
||
|
<span class="identifier">tfp</span><span class="element">.line_count</span><span class="plain">++;</span>
|
||
|
</pre>
|
||
|
|
||
|
<p class="inwebparagraph"></p>
|
||
|
|
||
|
<p class="endnote">This code is used in <a href="#SP5_3">§5.3</a> (twice).</p>
|
||
|
|
||
|
<p class="inwebparagraph"><a id="SP5_3_2"></a><b>§5.3.2. </b>But we update the text file position after every apparent line terminator.
|
||
|
This is because we might otherwise, on a Windows text file, end up with an
|
||
|
<code class="display"><span class="extract">ftell</span></code> position in between the <code class="display"><span class="extract">CR</span></code> and the <code class="display"><span class="extract">LF</span></code>; if we resume at that point,
|
||
|
later on, we'll then have an off-by-one error in the line numbering in the
|
||
|
resumption as compared to during the original pass.
|
||
|
</p>
|
||
|
|
||
|
<p class="inwebparagraph">Properly speaking, <code class="display"><span class="extract">ftell</span></code> returns a long <code class="display"><span class="extract">int</span></code>, not an <code class="display"><span class="extract">int</span></code>, but on a
|
||
|
32-bit-or-more integer machine, this gives us room for files to run to 2GB.
|
||
|
Text files seldom come that large.
|
||
|
</p>
|
||
|
|
||
|
|
||
|
<p class="macrodefinition"><code class="display">
|
||
|
<<span class="cwebmacrodefn">Update the text file position</span> <span class="cwebmacronumber">5.3.2</span>> =
|
||
|
</code></p>
|
||
|
|
||
|
|
||
|
<pre class="displaydefn">
|
||
|
<span class="identifier">tfp</span><span class="element">.line_position</span><span class="plain"> = (</span><span class="reserved">int</span><span class="plain">) (</span><span class="identifier">ftell</span><span class="plain">(</span><span class="identifier">tfp</span><span class="element">.handle_when_open</span><span class="plain">));</span>
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">tfp</span><span class="element">.line_position</span><span class="plain"> == -1) {</span>
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">serious</span><span class="plain">)</span>
|
||
|
<span class="functiontext">Errors::fatal_with_file</span><span class="plain">(</span><span class="string">"unable to determine position in file"</span><span class="plain">, </span><span class="identifier">F</span><span class="plain">);</span>
|
||
|
<span class="reserved">else</span>
|
||
|
<span class="functiontext">Errors::with_file</span><span class="plain">(</span><span class="string">"unable to determine position in file"</span><span class="plain">, </span><span class="identifier">F</span><span class="plain">);</span>
|
||
|
<span class="plain">}</span>
|
||
|
</pre>
|
||
|
|
||
|
<p class="inwebparagraph"></p>
|
||
|
|
||
|
<p class="endnote">This code is used in <a href="#SP5_3">§5.3</a>.</p>
|
||
|
|
||
|
<p class="inwebparagraph"><a id="SP6"></a><b>§6. </b></p>
|
||
|
|
||
|
|
||
|
<pre class="display">
|
||
|
<span class="reserved">void</span><span class="plain"> </span><span class="functiontext">TextFiles::read_line</span><span class="plain">(</span><span class="constant">OUTPUT_STREAM</span><span class="plain">, </span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">escape_oddities</span><span class="plain">, </span><span class="reserved">text_file_position</span><span class="plain"> *</span><span class="identifier">tfp</span><span class="plain">) {</span>
|
||
|
<span class="functiontext">Str::clear</span><span class="plain">(</span><span class="identifier">OUT</span><span class="plain">);</span>
|
||
|
<span class="reserved">int</span><span class="plain"> </span><span class="identifier">i</span><span class="plain"> = 0, </span><span class="identifier">c</span><span class="plain"> = </span><span class="character">' '</span><span class="plain">;</span>
|
||
|
<span class="reserved">while</span><span class="plain"> ((</span><span class="identifier">c</span><span class="plain"> != </span><span class="identifier">EOF</span><span class="plain">) && (</span><span class="identifier">tfp</span><span class="plain">-</span><span class="element">>actively_scanning</span><span class="plain">)) {</span>
|
||
|
<span class="identifier">c</span><span class="plain"> = </span><span class="functiontext">TextFiles::utf8_fgetc</span><span class="plain">(</span><span class="identifier">tfp</span><span class="plain">-</span><span class="element">>handle_when_open</span><span class="plain">, </span><span class="identifier">NULL</span><span class="plain">, </span><span class="identifier">escape_oddities</span><span class="plain">, &</span><span class="identifier">tfp</span><span class="plain">-</span><span class="element">>ufb</span><span class="plain">);</span>
|
||
|
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">c</span><span class="plain"> == </span><span class="identifier">EOF</span><span class="plain">) || (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'\</span><span class="plain">x</span><span class="character">0a'</span><span class="plain">) || (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'\</span><span class="plain">x</span><span class="character">0d'</span><span class="plain">)) {</span>
|
||
|
<span class="functiontext">Str::put_at</span><span class="plain">(</span><span class="identifier">OUT</span><span class="plain">, </span><span class="identifier">i</span><span class="plain">, 0);</span>
|
||
|
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">i</span><span class="plain"> > 0) || (</span><span class="identifier">c</span><span class="plain"> != </span><span class="identifier">tfp</span><span class="plain">-</span><span class="element">>skip_terminator</span><span class="plain">)) {</span>
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'\</span><span class="plain">x</span><span class="character">0a'</span><span class="plain">) </span><span class="identifier">tfp</span><span class="plain">-</span><span class="element">>skip_terminator</span><span class="plain"> = </span><span class="character">'\</span><span class="plain">x</span><span class="character">0d'</span><span class="plain">;</span>
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'\</span><span class="plain">x</span><span class="character">0d'</span><span class="plain">) </span><span class="identifier">tfp</span><span class="plain">-</span><span class="element">>skip_terminator</span><span class="plain"> = </span><span class="character">'\</span><span class="plain">x</span><span class="character">0a'</span><span class="plain">;</span>
|
||
|
<span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="identifier">tfp</span><span class="plain">-</span><span class="element">>skip_terminator</span><span class="plain"> = </span><span class="character">'X'</span><span class="plain">;</span>
|
||
|
<span class="identifier">tfp</span><span class="plain">-</span><span class="element">>line_position</span><span class="plain"> = (</span><span class="reserved">int</span><span class="plain">) (</span><span class="identifier">ftell</span><span class="plain">(</span><span class="identifier">tfp</span><span class="plain">-</span><span class="element">>handle_when_open</span><span class="plain">));</span>
|
||
|
<span class="identifier">i</span><span class="plain"> = 0;</span>
|
||
|
<span class="identifier">tfp</span><span class="plain">-</span><span class="element">>line_count</span><span class="plain">++; </span><span class="reserved">return</span><span class="plain">;</span>
|
||
|
<span class="plain">}</span>
|
||
|
<span class="functiontext">Str::put_at</span><span class="plain">(</span><span class="identifier">OUT</span><span class="plain">, </span><span class="identifier">i</span><span class="plain">++, (</span><span class="identifier">wchar_t</span><span class="plain">) </span><span class="identifier">c</span><span class="plain">);</span>
|
||
|
<span class="plain">}</span>
|
||
|
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">i</span><span class="plain"> > 0) && (</span><span class="identifier">tfp</span><span class="plain">-</span><span class="element">>actively_scanning</span><span class="plain">)) </span><span class="identifier">tfp</span><span class="plain">-</span><span class="element">>line_count</span><span class="plain">++;</span>
|
||
|
<span class="plain">}</span>
|
||
|
</pre>
|
||
|
|
||
|
<p class="inwebparagraph"></p>
|
||
|
|
||
|
<p class="endnote">The function TextFiles::read_line appears nowhere else.</p>
|
||
|
|
||
|
<p class="inwebparagraph"><a id="SP7"></a><b>§7. </b>The routine being iterated can indicate that it has had enough by
|
||
|
calling the following:
|
||
|
</p>
|
||
|
|
||
|
|
||
|
<pre class="display">
|
||
|
<span class="reserved">void</span><span class="plain"> </span><span class="functiontext">TextFiles::lose_interest</span><span class="plain">(</span><span class="reserved">text_file_position</span><span class="plain"> *</span><span class="identifier">tfp</span><span class="plain">) {</span>
|
||
|
<span class="identifier">tfp</span><span class="plain">-</span><span class="element">>actively_scanning</span><span class="plain"> = </span><span class="constant">FALSE</span><span class="plain">;</span>
|
||
|
<span class="plain">}</span>
|
||
|
</pre>
|
||
|
|
||
|
<p class="inwebparagraph"></p>
|
||
|
|
||
|
<p class="endnote">The function TextFiles::lose_interest appears nowhere else.</p>
|
||
|
|
||
|
<p class="inwebparagraph"><a id="SP8"></a><b>§8. Reading UTF-8 files. </b>The following routine reads a sequence of Unicode characters from a UTF-8
|
||
|
encoded file, but returns them as a sequence of ISO Latin-1 characters, a
|
||
|
trick it can only pull off by escaping non-ISO characters. This is done by
|
||
|
taking character number <code class="display"><span class="extract">N</span></code> and feeding it out, one character at a time, as
|
||
|
the text <code class="display"><span class="extract">[unicode N]</span></code>, writing the number in decimal. Only one UTF-8
|
||
|
file like this will be being read at a time, and the routine will be
|
||
|
repeatedly called until <code class="display"><span class="extract">EOF</span></code> or a line division.
|
||
|
</p>
|
||
|
|
||
|
<p class="inwebparagraph">Strictly speaking, we transmit not as ISO Latin-1 but as that subset of ISO
|
||
|
which have corresponding (different) codes in the ZSCII character set. This
|
||
|
excludes some typewriter symbols and a handful of letterforms, as we shall
|
||
|
see.
|
||
|
</p>
|
||
|
|
||
|
<p class="inwebparagraph">There are two exceptions: <code class="display"><span class="extract">TextFiles::utf8_fgetc</span></code> can also return the usual C
|
||
|
end-of-file pseudo-character <code class="display"><span class="extract">EOF</span></code>, and it can also return the Unicode BOM
|
||
|
(byte-ordering marker) pseudo-character, which is legal at the start of a
|
||
|
file and which is automatically prepended by some text editors and
|
||
|
word-processors when they save a UTF-8 file (though in fact it is not
|
||
|
required by the UTF-8 specification). Anyone calling <code class="display"><span class="extract">TextFiles::utf8_fgetc</span></code> must
|
||
|
check the return value for <code class="display"><span class="extract">EOF</span></code> every time, and for <code class="display"><span class="extract">0xFEFF</span></code> every time we
|
||
|
might be at the start of the file being read.
|
||
|
</p>
|
||
|
|
||
|
|
||
|
<pre class="display">
|
||
|
<span class="reserved">typedef</span><span class="plain"> </span><span class="reserved">struct</span><span class="plain"> </span><span class="reserved">unicode_file_buffer</span><span class="plain"> {</span>
|
||
|
<span class="reserved">char</span><span class="plain"> </span><span class="identifier">unicode_feed_buffer</span><span class="plain">[32]; </span> <span class="comment">holds a single escape such as "[unicode 3106]"</span>
|
||
|
<span class="reserved">int</span><span class="plain"> </span><span class="identifier">ufb_counter</span><span class="plain">; </span> <span class="comment">position in the unicode feed buffer</span>
|
||
|
<span class="plain">} </span><span class="reserved">unicode_file_buffer</span><span class="plain">;</span>
|
||
|
|
||
|
<span class="reserved">unicode_file_buffer</span><span class="plain"> </span><span class="functiontext">TextFiles::create_ufb</span><span class="plain">(</span><span class="reserved">void</span><span class="plain">) {</span>
|
||
|
<span class="reserved">unicode_file_buffer</span><span class="plain"> </span><span class="identifier">ufb</span><span class="plain">;</span>
|
||
|
<span class="identifier">ufb</span><span class="element">.ufb_counter</span><span class="plain"> = -1;</span>
|
||
|
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">ufb</span><span class="plain">;</span>
|
||
|
<span class="plain">}</span>
|
||
|
|
||
|
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">TextFiles::utf8_fgetc</span><span class="plain">(</span><span class="reserved">FILE</span><span class="plain"> *</span><span class="identifier">from</span><span class="plain">, </span><span class="reserved">char</span><span class="plain"> **</span><span class="identifier">or_from</span><span class="plain">, </span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">escape_oddities</span><span class="plain">,</span>
|
||
|
<span class="reserved">unicode_file_buffer</span><span class="plain"> *</span><span class="identifier">ufb</span><span class="plain">) {</span>
|
||
|
<span class="reserved">int</span><span class="plain"> </span><span class="identifier">c</span><span class="plain"> = </span><span class="identifier">EOF</span><span class="plain">, </span><span class="identifier">conts</span><span class="plain">;</span>
|
||
|
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">ufb</span><span class="plain">) && (</span><span class="identifier">ufb</span><span class="plain">-</span><span class="element">>ufb_counter</span><span class="plain"> >= 0)) {</span>
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">ufb</span><span class="plain">-</span><span class="element">>unicode_feed_buffer</span><span class="plain">[</span><span class="identifier">ufb</span><span class="plain">-</span><span class="element">>ufb_counter</span><span class="plain">] == 0) </span><span class="identifier">ufb</span><span class="plain">-</span><span class="element">>ufb_counter</span><span class="plain"> = -1;</span>
|
||
|
<span class="reserved">else</span><span class="plain"> </span><span class="reserved">return</span><span class="plain"> </span><span class="identifier">ufb</span><span class="plain">-</span><span class="element">>unicode_feed_buffer</span><span class="plain">[</span><span class="identifier">ufb</span><span class="plain">-</span><span class="element">>ufb_counter</span><span class="plain">++];</span>
|
||
|
<span class="plain">}</span>
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">from</span><span class="plain">) </span><span class="identifier">c</span><span class="plain"> = </span><span class="identifier">fgetc</span><span class="plain">(</span><span class="identifier">from</span><span class="plain">); </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="identifier">or_from</span><span class="plain">) </span><span class="identifier">c</span><span class="plain"> = ((</span><span class="reserved">unsigned</span><span class="plain"> </span><span class="reserved">char</span><span class="plain">) *((*</span><span class="identifier">or_from</span><span class="plain">)++));</span>
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">c</span><span class="plain"> == </span><span class="identifier">EOF</span><span class="plain">) </span><span class="reserved">return</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">; </span> <span class="comment">ruling out EOF leaves a genuine byte from the file</span>
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">c</span><span class="plain"><0</span><span class="identifier">x80</span><span class="plain">) </span><span class="reserved">return</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">; </span> <span class="comment">in all other cases, a UTF-8 continuation sequence begins</span>
|
||
|
|
||
|
<<span class="cwebmacro">Unpack one to five continuation bytes to obtain the Unicode character code</span> <span class="cwebmacronumber">8.1</span>><span class="plain">;</span>
|
||
|
<<span class="cwebmacro">Return non-ASCII codes in the intersection of ISO Latin-1 and ZSCII as literals</span> <span class="cwebmacronumber">8.2</span>><span class="plain">;</span>
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">escape_oddities</span><span class="plain">) </span><<span class="cwebmacro">Return Unicode fancy equivalents as simpler literals</span> <span class="cwebmacronumber">8.3</span>><span class="plain">;</span>
|
||
|
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">c</span><span class="plain"> == 0</span><span class="identifier">xFEFF</span><span class="plain">) </span><span class="reserved">return</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">; </span> <span class="comment">the Unicode BOM non-character</span>
|
||
|
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">escape_oddities</span><span class="plain"> == </span><span class="constant">FALSE</span><span class="plain">) </span><span class="reserved">return</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">;</span>
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">ufb</span><span class="plain">) {</span>
|
||
|
<span class="identifier">sprintf</span><span class="plain">(</span><span class="identifier">ufb</span><span class="plain">-</span><span class="element">>unicode_feed_buffer</span><span class="plain">, </span><span class="string">"[unicode %d]"</span><span class="plain">, </span><span class="identifier">c</span><span class="plain">);</span>
|
||
|
<span class="identifier">ufb</span><span class="plain">-</span><span class="element">>ufb_counter</span><span class="plain"> = 1;</span>
|
||
|
<span class="reserved">return</span><span class="plain"> </span><span class="character">'['</span><span class="plain">;</span>
|
||
|
<span class="plain">}</span>
|
||
|
<span class="reserved">return</span><span class="plain"> </span><span class="character">'?'</span><span class="plain">;</span>
|
||
|
<span class="plain">}</span>
|
||
|
</pre>
|
||
|
|
||
|
<p class="inwebparagraph"></p>
|
||
|
|
||
|
<p class="endnote">The function TextFiles::create_ufb is used in <a href="#SP5">§5</a>, 2/str (<a href="2-str.html#SP28_2">§28.2</a>).</p>
|
||
|
|
||
|
<p class="endnote">The function TextFiles::utf8_fgetc is used in <a href="#SP5_3">§5.3</a>, <a href="#SP6">§6</a>, 2/str (<a href="2-str.html#SP28_2">§28.2</a>).</p>
|
||
|
|
||
|
<p class="endnote">The structure unicode_file_buffer is private to this section.</p>
|
||
|
|
||
|
<p class="inwebparagraph"><a id="SP8_1"></a><b>§8.1. </b>Not every byte sequence is legal in a UTF-8 file: if we find a malformed
|
||
|
continuation, we process it as a question mark rather than throwing a
|
||
|
fatal error (which is pretty well the only alternative here). The user
|
||
|
is likely to see problem messages later on which arise from the question
|
||
|
marks, and that will have to do.
|
||
|
</p>
|
||
|
|
||
|
|
||
|
<p class="macrodefinition"><code class="display">
|
||
|
<<span class="cwebmacrodefn">Unpack one to five continuation bytes to obtain the Unicode character code</span> <span class="cwebmacronumber">8.1</span>> =
|
||
|
</code></p>
|
||
|
|
||
|
|
||
|
<pre class="displaydefn">
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">c</span><span class="plain"><0</span><span class="identifier">xC0</span><span class="plain">) </span><span class="reserved">return</span><span class="plain"> </span><span class="character">'?'</span><span class="plain">; </span> <span class="comment">malformed UTF-8</span>
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">c</span><span class="plain"><0</span><span class="identifier">xE0</span><span class="plain">) { </span><span class="identifier">c</span><span class="plain"> = </span><span class="identifier">c</span><span class="plain"> & 0</span><span class="identifier">x1f</span><span class="plain">; </span><span class="identifier">conts</span><span class="plain"> = 1; }</span>
|
||
|
<span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="identifier">c</span><span class="plain"><0</span><span class="identifier">xF0</span><span class="plain">) { </span><span class="identifier">c</span><span class="plain"> = </span><span class="identifier">c</span><span class="plain"> & 0</span><span class="identifier">xf</span><span class="plain">; </span><span class="identifier">conts</span><span class="plain"> = 2; }</span>
|
||
|
<span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="identifier">c</span><span class="plain"><0</span><span class="identifier">xF8</span><span class="plain">) { </span><span class="identifier">c</span><span class="plain"> = </span><span class="identifier">c</span><span class="plain"> & 0</span><span class="identifier">x7</span><span class="plain">; </span><span class="identifier">conts</span><span class="plain"> = 3; }</span>
|
||
|
<span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="identifier">c</span><span class="plain"><0</span><span class="identifier">xFC</span><span class="plain">) { </span><span class="identifier">c</span><span class="plain"> = </span><span class="identifier">c</span><span class="plain"> & 0</span><span class="identifier">x3</span><span class="plain">; </span><span class="identifier">conts</span><span class="plain"> = 4; }</span>
|
||
|
<span class="reserved">else</span><span class="plain"> { </span><span class="identifier">c</span><span class="plain"> = </span><span class="identifier">c</span><span class="plain"> & 0</span><span class="identifier">x1</span><span class="plain">; </span><span class="identifier">conts</span><span class="plain"> = 5; }</span>
|
||
|
<span class="reserved">while</span><span class="plain"> (</span><span class="identifier">conts</span><span class="plain"> > 0) {</span>
|
||
|
<span class="reserved">int</span><span class="plain"> </span><span class="identifier">d</span><span class="plain"> = </span><span class="identifier">EOF</span><span class="plain">;</span>
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">from</span><span class="plain">) </span><span class="identifier">d</span><span class="plain"> = </span><span class="identifier">fgetc</span><span class="plain">(</span><span class="identifier">from</span><span class="plain">); </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="identifier">or_from</span><span class="plain">) </span><span class="identifier">d</span><span class="plain"> = ((</span><span class="reserved">unsigned</span><span class="plain"> </span><span class="reserved">char</span><span class="plain">) *((*</span><span class="identifier">or_from</span><span class="plain">)++));</span>
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">d</span><span class="plain"> == </span><span class="identifier">EOF</span><span class="plain">) </span><span class="reserved">return</span><span class="plain"> </span><span class="character">'?'</span><span class="plain">; </span> <span class="comment">malformed UTF-8</span>
|
||
|
<span class="identifier">c</span><span class="plain"> = </span><span class="identifier">c</span><span class="plain"> << 6;</span>
|
||
|
<span class="identifier">c</span><span class="plain"> = </span><span class="identifier">c</span><span class="plain"> + (</span><span class="identifier">d</span><span class="plain"> & 0</span><span class="identifier">x3F</span><span class="plain">);</span>
|
||
|
<span class="identifier">conts</span><span class="plain">--;</span>
|
||
|
<span class="plain">}</span>
|
||
|
</pre>
|
||
|
|
||
|
<p class="inwebparagraph"></p>
|
||
|
|
||
|
<p class="endnote">This code is used in <a href="#SP8">§8</a>.</p>
|
||
|
|
||
|
<p class="inwebparagraph"><a id="SP8_2"></a><b>§8.2. </b>For the ZSCII character set, see "The Inform 6 Designer's Manual", or
|
||
|
"The Z-Machine Standards Document". It offers a range of west European
|
||
|
accented letters which almost, but not quite, matches those on offer in
|
||
|
ISO Latin-1 — it omits for example Icelandic lower case eth. (ZSCII was
|
||
|
developed in the 1980s by Infocom, Imc., to encode their interactive
|
||
|
fiction offerings. Had they been collaborating with J. R. R. Tolkien
|
||
|
rather than Douglas Adams, they might have filled this gap. As it was,
|
||
|
"eth" never occurred in any of their works.)
|
||
|
</p>
|
||
|
|
||
|
<p class="inwebparagraph">We let the multiplication sign <code class="display"><span class="extract">0xd7</span></code> through even though ZSCII doesn't
|
||
|
support it, but convert it to an "x": this is so that we can parse numbers
|
||
|
in scientific notation.
|
||
|
</p>
|
||
|
|
||
|
|
||
|
<p class="macrodefinition"><code class="display">
|
||
|
<<span class="cwebmacrodefn">Return non-ASCII codes in the intersection of ISO Latin-1 and ZSCII as literals</span> <span class="cwebmacronumber">8.2</span>> =
|
||
|
</code></p>
|
||
|
|
||
|
|
||
|
<pre class="displaydefn">
|
||
|
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">c</span><span class="plain"> == 0</span><span class="identifier">xa1</span><span class="plain">) || (</span><span class="identifier">c</span><span class="plain"> == 0</span><span class="identifier">xa3</span><span class="plain">) || (</span><span class="identifier">c</span><span class="plain"> == 0</span><span class="identifier">xbf</span><span class="plain">)) </span><span class="reserved">return</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">; </span> <span class="comment">pound sign, inverted ! and ?</span>
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">c</span><span class="plain"> == 0</span><span class="identifier">xd7</span><span class="plain">) </span><span class="reserved">return</span><span class="plain"> </span><span class="character">'x'</span><span class="plain">; </span> <span class="comment">convert multiplication sign to lower case "x"</span>
|
||
|
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">c</span><span class="plain"> >= 0</span><span class="identifier">xc0</span><span class="plain">) && (</span><span class="identifier">c</span><span class="plain"> <= 0</span><span class="identifier">xff</span><span class="plain">)) { </span> <span class="comment">accented West European letters, but...</span>
|
||
|
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">c</span><span class="plain"> != 0</span><span class="identifier">xd0</span><span class="plain">) && (</span><span class="identifier">c</span><span class="plain"> != 0</span><span class="identifier">xf0</span><span class="plain">) && </span> <span class="comment">not Icelandic eths</span>
|
||
|
<span class="plain">(</span><span class="identifier">c</span><span class="plain"> != 0</span><span class="identifier">xde</span><span class="plain">) && (</span><span class="identifier">c</span><span class="plain"> != 0</span><span class="identifier">xfe</span><span class="plain">) && </span> <span class="comment">nor Icelandic thorns</span>
|
||
|
<span class="plain">(</span><span class="identifier">c</span><span class="plain"> != 0</span><span class="identifier">xf7</span><span class="plain">)) </span> <span class="comment">nor division signs</span>
|
||
|
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">;</span>
|
||
|
<span class="plain">}</span>
|
||
|
</pre>
|
||
|
|
||
|
<p class="inwebparagraph"></p>
|
||
|
|
||
|
<p class="endnote">This code is used in <a href="#SP8">§8</a>.</p>
|
||
|
|
||
|
<p class="inwebparagraph"><a id="SP8_3"></a><b>§8.3. </b>We err on the safe side, accepting em-rules and non-breaking spaces, etc.,
|
||
|
where we would normally expect hyphens and ordinary spaces: this is intended
|
||
|
for the benefit of users with helpful word-processors which autocorrect
|
||
|
hyphens into em-rules when they are flanked by spaces, and so on.
|
||
|
</p>
|
||
|
|
||
|
|
||
|
<p class="macrodefinition"><code class="display">
|
||
|
<<span class="cwebmacrodefn">Return Unicode fancy equivalents as simpler literals</span> <span class="cwebmacronumber">8.3</span>> =
|
||
|
</code></p>
|
||
|
|
||
|
|
||
|
<pre class="displaydefn">
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">c</span><span class="plain"> == 0</span><span class="identifier">x85</span><span class="plain">) </span><span class="reserved">return</span><span class="plain"> </span><span class="character">'\</span><span class="plain">x</span><span class="character">0d'</span><span class="plain">; </span> <span class="comment">NEL, or "next line"</span>
|
||
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">c</span><span class="plain"> == 0</span><span class="identifier">xa0</span><span class="plain">) </span><span class="reserved">return</span><span class="plain"> </span><span class="character">' '</span><span class="plain">; </span> <span class="comment">non-breaking space</span>
|
||
|
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">c</span><span class="plain"> >= 0</span><span class="identifier">x2000</span><span class="plain">) && (</span><span class="identifier">c</span><span class="plain"> <= 0</span><span class="identifier">x200a</span><span class="plain">)) </span><span class="reserved">return</span><span class="plain"> </span><span class="character">' '</span><span class="plain">; </span> <span class="comment">space variants</span>
|
||
|
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">c</span><span class="plain"> >= 0</span><span class="identifier">x2010</span><span class="plain">) && (</span><span class="identifier">c</span><span class="plain"> <= 0</span><span class="identifier">x2014</span><span class="plain">)) </span><span class="reserved">return</span><span class="plain"> </span><span class="character">'-'</span><span class="plain">; </span> <span class="comment">rules and dashes</span>
|
||
|
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">c</span><span class="plain"> >= 0</span><span class="identifier">x2018</span><span class="plain">) && (</span><span class="identifier">c</span><span class="plain"> <= 0</span><span class="identifier">x2019</span><span class="plain">)) </span><span class="reserved">return</span><span class="plain"> </span><span class="character">'\</span><span class="plain">'</span><span class="character">'</span><span class="plain">; </span> <span class="comment">smart single quotes</span>
|
||
|
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">c</span><span class="plain"> >= 0</span><span class="identifier">x201c</span><span class="plain">) && (</span><span class="identifier">c</span><span class="plain"> <= 0</span><span class="identifier">x201d</span><span class="plain">)) </span><span class="reserved">return</span><span class="plain"> </span><span class="character">'"'</span><span class="plain">; /* </span><span class="identifier">smart</span><span class="plain"> </span><span class="reserved">double</span><span class="plain"> </span><span class="identifier">quotes</span><span class="plain"> */</span>
|
||
|
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">c</span><span class="plain"> >= 0</span><span class="identifier">x2028</span><span class="plain">) && (</span><span class="identifier">c</span><span class="plain"> <= 0</span><span class="identifier">x2029</span><span class="plain">)) </span><span class="reserved">return</span><span class="plain"> </span><span class="character">'\</span><span class="plain">x</span><span class="character">0d'</span><span class="plain">; </span> <span class="comment">fancy newlines</span>
|
||
|
</pre>
|
||
|
|
||
|
<p class="inwebparagraph"></p>
|
||
|
|
||
|
<p class="endnote">This code is used in <a href="#SP8">§8</a>.</p>
|
||
|
|
||
|
<!--End of weave: 305 lines from a web of 9228-->
|
||
|
</body>
|
||
|
</html>
|
||
|
|