233 lines
43 KiB
HTML
233 lines
43 KiB
HTML
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
|
<html>
|
|
<head>
|
|
<title>Characters</title>
|
|
<meta name="viewport" content="width=device-width initial-scale=1">
|
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
|
<meta http-equiv="Content-Language" content="en-gb">
|
|
<link href="../inweb.css" rel="stylesheet" rev="stylesheet" type="text/css">
|
|
</head>
|
|
<body>
|
|
<nav role="navigation">
|
|
<h1><a href="../webs.html">Sources</a></h1>
|
|
<ul>
|
|
<li><a href="../inweb/index.html">inweb</a></li>
|
|
</ul>
|
|
<h2>Foundation</h2>
|
|
<ul>
|
|
<li><a href="../foundation-module/index.html">foundation-module</a></li>
|
|
<li><a href="../foundation-test/index.html">foundation-test</a></li>
|
|
</ul>
|
|
|
|
|
|
</nav>
|
|
<main role="main">
|
|
|
|
<!--Weave of 'Characters' generated by 7-->
|
|
<ul class="crumbs"><li><a href="../webs.html">Source</a></li><li><a href="index.html">foundation</a></li><li><a href="index.html#4">Chapter 4: Text Handling</a></li><li><b>Characters</b></li></ul><p class="purpose">Individual characters.</p>
|
|
|
|
<ul class="toc"><li><a href="#SP1">§1. Character classes</a></li><li><a href="#SP4">§4. Unicode composition</a></li><li><a href="#SP5">§5. Accent stripping</a></li></ul><hr class="tocbar">
|
|
|
|
<p class="inwebparagraph"><a id="SP1"></a><b>§1. Character classes. </b></p>
|
|
|
|
<pre class="display">
|
|
<span class="identifier">wchar_t</span><span class="plain"> </span><span class="functiontext">Characters::tolower</span><span class="plain">(</span><span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
|
|
<span class="reserved">return</span><span class="plain"> (</span><span class="identifier">wchar_t</span><span class="plain">) </span><span class="identifier">tolower</span><span class="plain">((</span><span class="reserved">int</span><span class="plain">) </span><span class="identifier">c</span><span class="plain">);</span>
|
|
<span class="plain">}</span>
|
|
<span class="identifier">wchar_t</span><span class="plain"> </span><span class="functiontext">Characters::toupper</span><span class="plain">(</span><span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
|
|
<span class="reserved">return</span><span class="plain"> (</span><span class="identifier">wchar_t</span><span class="plain">) </span><span class="identifier">toupper</span><span class="plain">((</span><span class="reserved">int</span><span class="plain">) </span><span class="identifier">c</span><span class="plain">);</span>
|
|
<span class="plain">}</span>
|
|
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Characters::isalpha</span><span class="plain">(</span><span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
|
|
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">isalpha</span><span class="plain">((</span><span class="reserved">int</span><span class="plain">) </span><span class="identifier">c</span><span class="plain">);</span>
|
|
<span class="plain">}</span>
|
|
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Characters::isdigit</span><span class="plain">(</span><span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
|
|
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">isdigit</span><span class="plain">((</span><span class="reserved">int</span><span class="plain">) </span><span class="identifier">c</span><span class="plain">);</span>
|
|
<span class="plain">}</span>
|
|
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Characters::isupper</span><span class="plain">(</span><span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
|
|
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">isupper</span><span class="plain">((</span><span class="reserved">int</span><span class="plain">) </span><span class="identifier">c</span><span class="plain">);</span>
|
|
<span class="plain">}</span>
|
|
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Characters::islower</span><span class="plain">(</span><span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
|
|
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">islower</span><span class="plain">((</span><span class="reserved">int</span><span class="plain">) </span><span class="identifier">c</span><span class="plain">);</span>
|
|
<span class="plain">}</span>
|
|
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Characters::isalnum</span><span class="plain">(</span><span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
|
|
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">isalnum</span><span class="plain">((</span><span class="reserved">int</span><span class="plain">) </span><span class="identifier">c</span><span class="plain">);</span>
|
|
<span class="plain">}</span>
|
|
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Characters::vowel</span><span class="plain">(</span><span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
|
|
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'a'</span><span class="plain">) || (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'e'</span><span class="plain">) || (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'i'</span><span class="plain">) || (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'o'</span><span class="plain">) || (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'u'</span><span class="plain">)) </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">TRUE</span><span class="plain">;</span>
|
|
<span class="reserved">return</span><span class="plain"> </span><span class="constant">FALSE</span><span class="plain">;</span>
|
|
<span class="plain">}</span>
|
|
</pre>
|
|
|
|
<p class="inwebparagraph"></p>
|
|
|
|
<p class="endnote">The function Characters::tolower is used in 3/fln (<a href="3-fln.html#SP9">§9</a>), 4/sm (<a href="4-sm.html#SP25">§25</a>), 4/taa (<a href="4-taa.html#SP2">§2</a>).</p>
|
|
|
|
<p class="endnote">The function Characters::toupper appears nowhere else.</p>
|
|
|
|
<p class="endnote">The function Characters::isalpha is used in 2/wal (<a href="2-wal.html#SP5">§5</a>).</p>
|
|
|
|
<p class="endnote">The function Characters::isdigit is used in 2/wal (<a href="2-wal.html#SP5">§5</a>), 3/fln (<a href="3-fln.html#SP9">§9</a>), 7/vn (<a href="7-vn.html#SP7">§7</a>, <a href="7-vn.html#SP10">§10</a>).</p>
|
|
|
|
<p class="endnote">The function Characters::isupper appears nowhere else.</p>
|
|
|
|
<p class="endnote">The function Characters::islower appears nowhere else.</p>
|
|
|
|
<p class="endnote">The function Characters::isalnum appears nowhere else.</p>
|
|
|
|
<p class="endnote">The function Characters::vowel appears nowhere else.</p>
|
|
|
|
<p class="inwebparagraph"><a id="SP2"></a><b>§2. </b>White space classes:
|
|
</p>
|
|
|
|
<pre class="display">
|
|
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Characters::is_space_or_tab</span><span class="plain">(</span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
|
|
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">' '</span><span class="plain">) || (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'\t'</span><span class="plain">)) </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">TRUE</span><span class="plain">;</span>
|
|
<span class="reserved">return</span><span class="plain"> </span><span class="constant">FALSE</span><span class="plain">;</span>
|
|
<span class="plain">}</span>
|
|
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Characters::is_whitespace</span><span class="plain">(</span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
|
|
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">' '</span><span class="plain">) || (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'\t'</span><span class="plain">) || (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'\n'</span><span class="plain">)) </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">TRUE</span><span class="plain">;</span>
|
|
<span class="reserved">return</span><span class="plain"> </span><span class="constant">FALSE</span><span class="plain">;</span>
|
|
<span class="plain">}</span>
|
|
</pre>
|
|
|
|
<p class="inwebparagraph"></p>
|
|
|
|
<p class="endnote">The function Characters::is_space_or_tab is used in 2/dl (<a href="2-dl.html#SP4_2">§4.2</a>), 4/sm (<a href="4-sm.html#SP22">§22</a>, <a href="4-sm.html#SP23">§23</a>), 4/pm (<a href="4-pm.html#SP13">§13</a>).</p>
|
|
|
|
<p class="endnote">The function Characters::is_whitespace is used in 8/ws (<a href="8-ws.html#SP7">§7</a>).</p>
|
|
|
|
<p class="inwebparagraph"><a id="SP3"></a><b>§3. </b>These are all the characters which would come out as whitespace in the
|
|
sense of the Treaty of Babel rules on leading and trailing spaces in
|
|
iFiction records.
|
|
</p>
|
|
|
|
<pre class="display">
|
|
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Characters::is_babel_whitespace</span><span class="plain">(</span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
|
|
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">' '</span><span class="plain">) || (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'\t'</span><span class="plain">) || (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'\x0a'</span><span class="plain">)</span>
|
|
<span class="plain">|| (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'\x0d'</span><span class="plain">) || (</span><span class="identifier">c</span><span class="plain"> == </span><span class="constant">NEWLINE_IN_STRING</span><span class="plain">)) </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">TRUE</span><span class="plain">;</span>
|
|
<span class="reserved">return</span><span class="plain"> </span><span class="constant">FALSE</span><span class="plain">;</span>
|
|
<span class="plain">}</span>
|
|
</pre>
|
|
|
|
<p class="inwebparagraph"></p>
|
|
|
|
<p class="endnote">The function Characters::is_babel_whitespace is used in 4/sm (<a href="4-sm.html#SP23">§23</a>).</p>
|
|
|
|
<p class="inwebparagraph"><a id="SP4"></a><b>§4. Unicode composition. </b>A routine which converts the Unicode combining accents with letters,
|
|
sufficient correctly to handle all characters in the ZSCII set.
|
|
</p>
|
|
|
|
<pre class="display">
|
|
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Characters::combine_accent</span><span class="plain">(</span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">accent</span><span class="plain">, </span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">letter</span><span class="plain">) {</span>
|
|
<span class="reserved">switch</span><span class="plain">(</span><span class="identifier">accent</span><span class="plain">) {</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0x0300</span><span class="identifier">:</span><span class="plain"> </span><span class="comment"> Unicode combining grave</span>
|
|
<span class="reserved">switch</span><span class="plain">(</span><span class="identifier">letter</span><span class="plain">) {</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="character">'a'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xE0</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'e'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xE8</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'i'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xEC</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="character">'o'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xF2</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'u'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xF9</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="character">'A'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xC0</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'E'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xC8</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'I'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xCC</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="character">'O'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xD2</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'U'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xD9</span><span class="plain">;</span>
|
|
<span class="plain">}</span>
|
|
<span class="reserved">break</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0x0301</span><span class="identifier">:</span><span class="plain"> </span><span class="comment"> Unicode combining acute</span>
|
|
<span class="reserved">switch</span><span class="plain">(</span><span class="identifier">letter</span><span class="plain">) {</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="character">'a'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xE1</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'e'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xE9</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'i'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xED</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="character">'o'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xF3</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'u'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xFA</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'y'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xFF</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="character">'A'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xC1</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'E'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xC9</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'I'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xCD</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="character">'O'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xD3</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'U'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xDA</span><span class="plain">;</span>
|
|
<span class="plain">}</span>
|
|
<span class="reserved">break</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0x0302</span><span class="identifier">:</span><span class="plain"> </span><span class="comment"> Unicode combining circumflex</span>
|
|
<span class="reserved">switch</span><span class="plain">(</span><span class="identifier">letter</span><span class="plain">) {</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="character">'a'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xE2</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'e'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xEA</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'i'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xEE</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="character">'o'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xF4</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'u'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xFB</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="character">'A'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xC2</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'E'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xCA</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'I'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xCE</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="character">'O'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xD4</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'U'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xDB</span><span class="plain">;</span>
|
|
<span class="plain">}</span>
|
|
<span class="reserved">break</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0x0303</span><span class="identifier">:</span><span class="plain"> </span><span class="comment"> Unicode combining tilde</span>
|
|
<span class="reserved">switch</span><span class="plain">(</span><span class="identifier">letter</span><span class="plain">) {</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="character">'a'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xE3</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'n'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xF1</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'o'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xF5</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="character">'A'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xC3</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'N'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xD1</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'O'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xD5</span><span class="plain">;</span>
|
|
<span class="plain">}</span>
|
|
<span class="reserved">break</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0x0308</span><span class="identifier">:</span><span class="plain"> </span><span class="comment"> Unicode combining diaeresis</span>
|
|
<span class="reserved">switch</span><span class="plain">(</span><span class="identifier">letter</span><span class="plain">) {</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="character">'a'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xE4</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'e'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xEB</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'u'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xFC</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="character">'o'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xF6</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'i'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xEF</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="character">'A'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xC4</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'E'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xCB</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'U'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xDC</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="character">'O'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xD6</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'I'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xCF</span><span class="plain">;</span>
|
|
<span class="plain">}</span>
|
|
<span class="reserved">break</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0x0327</span><span class="identifier">:</span><span class="plain"> </span><span class="comment"> Unicode combining cedilla</span>
|
|
<span class="reserved">switch</span><span class="plain">(</span><span class="identifier">letter</span><span class="plain">) {</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="character">'c'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xE7</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'C'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xC7</span><span class="plain">;</span>
|
|
<span class="plain">}</span>
|
|
<span class="reserved">break</span><span class="plain">;</span>
|
|
<span class="plain">}</span>
|
|
<span class="reserved">return</span><span class="plain"> </span><span class="character">'?'</span><span class="plain">;</span>
|
|
<span class="plain">}</span>
|
|
</pre>
|
|
|
|
<p class="inwebparagraph"></p>
|
|
|
|
<p class="endnote">The function Characters::combine_accent is used in 2/str (<a href="2-str.html#SP35">§35</a>).</p>
|
|
|
|
<p class="inwebparagraph"><a id="SP5"></a><b>§5. Accent stripping. </b>It's occasionally useful to simplify text used as a filename by removing
|
|
the more obvious accents from it.
|
|
</p>
|
|
|
|
<pre class="display">
|
|
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Characters::make_filename_safe</span><span class="plain">(</span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain">) {</span>
|
|
<span class="identifier">charcode</span><span class="plain"> = </span><span class="functiontext">Characters::remove_accent</span><span class="plain">(</span><span class="identifier">charcode</span><span class="plain">);</span>
|
|
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">charcode</span><span class="plain"> >= </span><span class="constant">128</span><span class="plain">) </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'-'</span><span class="plain">;</span>
|
|
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain">;</span>
|
|
<span class="plain">}</span>
|
|
</pre>
|
|
|
|
<p class="inwebparagraph"></p>
|
|
|
|
<p class="endnote">The function Characters::make_filename_safe appears nowhere else.</p>
|
|
|
|
<p class="inwebparagraph"><a id="SP6"></a><b>§6. </b>The following strips the accent, if present, from an ISO Latin-1 character:
|
|
</p>
|
|
|
|
<pre class="display">
|
|
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Characters::remove_accent</span><span class="plain">(</span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain">) {</span>
|
|
<span class="reserved">switch</span><span class="plain"> (</span><span class="identifier">charcode</span><span class="plain">) {</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xC0</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xC1</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xC2</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xC3</span><span class="identifier">:</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xC4</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xC5</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'A'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xE0</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xE1</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xE2</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xE3</span><span class="identifier">:</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xE4</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xE5</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'a'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xC8</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xC9</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xCA</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xCB</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'E'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xE8</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xE9</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xEA</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xEB</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'e'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xCC</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xCD</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xCE</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xCF</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'I'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xEC</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xED</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xEE</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xEF</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'i'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xD2</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xD3</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xD4</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xD5</span><span class="identifier">:</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xD6</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xD8</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'O'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xF2</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xF3</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xF4</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xF5</span><span class="identifier">:</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xF6</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xF8</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'o'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xD9</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xDA</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xDB</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xDC</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'U'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xF9</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xFA</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xFB</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xFC</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'u'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xDD</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'Y'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xFD</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'y'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xD1</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'N'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xF1</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'n'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xC7</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'C'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xE7</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'c'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
|
|
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xDF</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'s'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
|
|
<span class="plain">}</span>
|
|
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain">;</span>
|
|
<span class="plain">}</span>
|
|
</pre>
|
|
|
|
<p class="inwebparagraph"></p>
|
|
|
|
<p class="endnote">The function Characters::remove_accent is used in <a href="#SP5">§5</a>.</p>
|
|
|
|
<hr class="tocbar">
|
|
<ul class="toc"><li><i>(This section begins Chapter 4: Text Handling.)</i></li><li><a href="4-cst.html">Continue with 'C Strings'</a></li></ul><hr class="tocbar">
|
|
<!--End of weave-->
|
|
</main>
|
|
</body>
|
|
</html>
|
|
|