inweb-bootstrap/docs/foundation-module/4-chr.html
2020-04-08 23:41:00 +01:00

233 lines
43 KiB
HTML

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<title>Characters</title>
<meta name="viewport" content="width=device-width initial-scale=1">
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<meta http-equiv="Content-Language" content="en-gb">
<link href="../inweb.css" rel="stylesheet" rev="stylesheet" type="text/css">
</head>
<body>
<nav role="navigation">
<h1><a href="../webs.html">Sources</a></h1>
<ul>
<li><a href="../inweb/index.html">inweb</a></li>
</ul>
<h2>Foundation</h2>
<ul>
<li><a href="../foundation-module/index.html">foundation-module</a></li>
<li><a href="../foundation-test/index.html">foundation-test</a></li>
</ul>
</nav>
<main role="main">
<!--Weave of 'Characters' generated by 7-->
<ul class="crumbs"><li><a href="../webs.html">Source</a></li><li><a href="index.html">foundation</a></li><li><a href="index.html#4">Chapter 4: Text Handling</a></li><li><b>Characters</b></li></ul><p class="purpose">Individual characters.</p>
<ul class="toc"><li><a href="#SP1">&#167;1. Character classes</a></li><li><a href="#SP4">&#167;4. Unicode composition</a></li><li><a href="#SP5">&#167;5. Accent stripping</a></li></ul><hr class="tocbar">
<p class="inwebparagraph"><a id="SP1"></a><b>&#167;1. Character classes. </b></p>
<pre class="display">
<span class="identifier">wchar_t</span><span class="plain"> </span><span class="functiontext">Characters::tolower</span><span class="plain">(</span><span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
<span class="reserved">return</span><span class="plain"> (</span><span class="identifier">wchar_t</span><span class="plain">) </span><span class="identifier">tolower</span><span class="plain">((</span><span class="reserved">int</span><span class="plain">) </span><span class="identifier">c</span><span class="plain">);</span>
<span class="plain">}</span>
<span class="identifier">wchar_t</span><span class="plain"> </span><span class="functiontext">Characters::toupper</span><span class="plain">(</span><span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
<span class="reserved">return</span><span class="plain"> (</span><span class="identifier">wchar_t</span><span class="plain">) </span><span class="identifier">toupper</span><span class="plain">((</span><span class="reserved">int</span><span class="plain">) </span><span class="identifier">c</span><span class="plain">);</span>
<span class="plain">}</span>
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Characters::isalpha</span><span class="plain">(</span><span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">isalpha</span><span class="plain">((</span><span class="reserved">int</span><span class="plain">) </span><span class="identifier">c</span><span class="plain">);</span>
<span class="plain">}</span>
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Characters::isdigit</span><span class="plain">(</span><span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">isdigit</span><span class="plain">((</span><span class="reserved">int</span><span class="plain">) </span><span class="identifier">c</span><span class="plain">);</span>
<span class="plain">}</span>
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Characters::isupper</span><span class="plain">(</span><span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">isupper</span><span class="plain">((</span><span class="reserved">int</span><span class="plain">) </span><span class="identifier">c</span><span class="plain">);</span>
<span class="plain">}</span>
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Characters::islower</span><span class="plain">(</span><span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">islower</span><span class="plain">((</span><span class="reserved">int</span><span class="plain">) </span><span class="identifier">c</span><span class="plain">);</span>
<span class="plain">}</span>
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Characters::isalnum</span><span class="plain">(</span><span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">isalnum</span><span class="plain">((</span><span class="reserved">int</span><span class="plain">) </span><span class="identifier">c</span><span class="plain">);</span>
<span class="plain">}</span>
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Characters::vowel</span><span class="plain">(</span><span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'a'</span><span class="plain">) || (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'e'</span><span class="plain">) || (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'i'</span><span class="plain">) || (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'o'</span><span class="plain">) || (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'u'</span><span class="plain">)) </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">TRUE</span><span class="plain">;</span>
<span class="reserved">return</span><span class="plain"> </span><span class="constant">FALSE</span><span class="plain">;</span>
<span class="plain">}</span>
</pre>
<p class="inwebparagraph"></p>
<p class="endnote">The function Characters::tolower is used in 3/fln (<a href="3-fln.html#SP9">&#167;9</a>), 4/sm (<a href="4-sm.html#SP25">&#167;25</a>), 4/taa (<a href="4-taa.html#SP2">&#167;2</a>).</p>
<p class="endnote">The function Characters::toupper appears nowhere else.</p>
<p class="endnote">The function Characters::isalpha is used in 2/wal (<a href="2-wal.html#SP5">&#167;5</a>).</p>
<p class="endnote">The function Characters::isdigit is used in 2/wal (<a href="2-wal.html#SP5">&#167;5</a>), 3/fln (<a href="3-fln.html#SP9">&#167;9</a>), 7/vn (<a href="7-vn.html#SP7">&#167;7</a>, <a href="7-vn.html#SP10">&#167;10</a>).</p>
<p class="endnote">The function Characters::isupper appears nowhere else.</p>
<p class="endnote">The function Characters::islower appears nowhere else.</p>
<p class="endnote">The function Characters::isalnum appears nowhere else.</p>
<p class="endnote">The function Characters::vowel appears nowhere else.</p>
<p class="inwebparagraph"><a id="SP2"></a><b>&#167;2. </b>White space classes:
</p>
<pre class="display">
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Characters::is_space_or_tab</span><span class="plain">(</span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">' '</span><span class="plain">) || (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'\t'</span><span class="plain">)) </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">TRUE</span><span class="plain">;</span>
<span class="reserved">return</span><span class="plain"> </span><span class="constant">FALSE</span><span class="plain">;</span>
<span class="plain">}</span>
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Characters::is_whitespace</span><span class="plain">(</span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">' '</span><span class="plain">) || (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'\t'</span><span class="plain">) || (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'\n'</span><span class="plain">)) </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">TRUE</span><span class="plain">;</span>
<span class="reserved">return</span><span class="plain"> </span><span class="constant">FALSE</span><span class="plain">;</span>
<span class="plain">}</span>
</pre>
<p class="inwebparagraph"></p>
<p class="endnote">The function Characters::is_space_or_tab is used in 2/dl (<a href="2-dl.html#SP4_2">&#167;4.2</a>), 4/sm (<a href="4-sm.html#SP22">&#167;22</a>, <a href="4-sm.html#SP23">&#167;23</a>), 4/pm (<a href="4-pm.html#SP13">&#167;13</a>).</p>
<p class="endnote">The function Characters::is_whitespace is used in 8/ws (<a href="8-ws.html#SP7">&#167;7</a>).</p>
<p class="inwebparagraph"><a id="SP3"></a><b>&#167;3. </b>These are all the characters which would come out as whitespace in the
sense of the Treaty of Babel rules on leading and trailing spaces in
iFiction records.
</p>
<pre class="display">
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Characters::is_babel_whitespace</span><span class="plain">(</span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">' '</span><span class="plain">) || (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'\t'</span><span class="plain">) || (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'\x0a'</span><span class="plain">)</span>
<span class="plain">|| (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'\x0d'</span><span class="plain">) || (</span><span class="identifier">c</span><span class="plain"> == </span><span class="constant">NEWLINE_IN_STRING</span><span class="plain">)) </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">TRUE</span><span class="plain">;</span>
<span class="reserved">return</span><span class="plain"> </span><span class="constant">FALSE</span><span class="plain">;</span>
<span class="plain">}</span>
</pre>
<p class="inwebparagraph"></p>
<p class="endnote">The function Characters::is_babel_whitespace is used in 4/sm (<a href="4-sm.html#SP23">&#167;23</a>).</p>
<p class="inwebparagraph"><a id="SP4"></a><b>&#167;4. Unicode composition. </b>A routine which converts the Unicode combining accents with letters,
sufficient correctly to handle all characters in the ZSCII set.
</p>
<pre class="display">
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Characters::combine_accent</span><span class="plain">(</span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">accent</span><span class="plain">, </span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">letter</span><span class="plain">) {</span>
<span class="reserved">switch</span><span class="plain">(</span><span class="identifier">accent</span><span class="plain">) {</span>
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0x0300</span><span class="identifier">:</span><span class="plain"> </span><span class="comment"> Unicode combining grave</span>
<span class="reserved">switch</span><span class="plain">(</span><span class="identifier">letter</span><span class="plain">) {</span>
<span class="reserved">case</span><span class="plain"> </span><span class="character">'a'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xE0</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'e'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xE8</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'i'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xEC</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="character">'o'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xF2</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'u'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xF9</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="character">'A'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xC0</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'E'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xC8</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'I'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xCC</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="character">'O'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xD2</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'U'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xD9</span><span class="plain">;</span>
<span class="plain">}</span>
<span class="reserved">break</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0x0301</span><span class="identifier">:</span><span class="plain"> </span><span class="comment"> Unicode combining acute</span>
<span class="reserved">switch</span><span class="plain">(</span><span class="identifier">letter</span><span class="plain">) {</span>
<span class="reserved">case</span><span class="plain"> </span><span class="character">'a'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xE1</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'e'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xE9</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'i'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xED</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="character">'o'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xF3</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'u'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xFA</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'y'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xFF</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="character">'A'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xC1</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'E'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xC9</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'I'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xCD</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="character">'O'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xD3</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'U'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xDA</span><span class="plain">;</span>
<span class="plain">}</span>
<span class="reserved">break</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0x0302</span><span class="identifier">:</span><span class="plain"> </span><span class="comment"> Unicode combining circumflex</span>
<span class="reserved">switch</span><span class="plain">(</span><span class="identifier">letter</span><span class="plain">) {</span>
<span class="reserved">case</span><span class="plain"> </span><span class="character">'a'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xE2</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'e'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xEA</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'i'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xEE</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="character">'o'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xF4</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'u'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xFB</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="character">'A'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xC2</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'E'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xCA</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'I'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xCE</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="character">'O'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xD4</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'U'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xDB</span><span class="plain">;</span>
<span class="plain">}</span>
<span class="reserved">break</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0x0303</span><span class="identifier">:</span><span class="plain"> </span><span class="comment"> Unicode combining tilde</span>
<span class="reserved">switch</span><span class="plain">(</span><span class="identifier">letter</span><span class="plain">) {</span>
<span class="reserved">case</span><span class="plain"> </span><span class="character">'a'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xE3</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'n'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xF1</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'o'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xF5</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="character">'A'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xC3</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'N'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xD1</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'O'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xD5</span><span class="plain">;</span>
<span class="plain">}</span>
<span class="reserved">break</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0x0308</span><span class="identifier">:</span><span class="plain"> </span><span class="comment"> Unicode combining diaeresis</span>
<span class="reserved">switch</span><span class="plain">(</span><span class="identifier">letter</span><span class="plain">) {</span>
<span class="reserved">case</span><span class="plain"> </span><span class="character">'a'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xE4</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'e'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xEB</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'u'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xFC</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="character">'o'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xF6</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'i'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xEF</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="character">'A'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xC4</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'E'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xCB</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'U'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xDC</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="character">'O'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xD6</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'I'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xCF</span><span class="plain">;</span>
<span class="plain">}</span>
<span class="reserved">break</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0x0327</span><span class="identifier">:</span><span class="plain"> </span><span class="comment"> Unicode combining cedilla</span>
<span class="reserved">switch</span><span class="plain">(</span><span class="identifier">letter</span><span class="plain">) {</span>
<span class="reserved">case</span><span class="plain"> </span><span class="character">'c'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xE7</span><span class="plain">; </span><span class="reserved">case</span><span class="plain"> </span><span class="character">'C'</span><span class="plain">: </span><span class="reserved">return</span><span class="plain"> </span><span class="constant">0xC7</span><span class="plain">;</span>
<span class="plain">}</span>
<span class="reserved">break</span><span class="plain">;</span>
<span class="plain">}</span>
<span class="reserved">return</span><span class="plain"> </span><span class="character">'?'</span><span class="plain">;</span>
<span class="plain">}</span>
</pre>
<p class="inwebparagraph"></p>
<p class="endnote">The function Characters::combine_accent is used in 2/str (<a href="2-str.html#SP35">&#167;35</a>).</p>
<p class="inwebparagraph"><a id="SP5"></a><b>&#167;5. Accent stripping. </b>It's occasionally useful to simplify text used as a filename by removing
the more obvious accents from it.
</p>
<pre class="display">
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Characters::make_filename_safe</span><span class="plain">(</span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain">) {</span>
<span class="identifier">charcode</span><span class="plain"> = </span><span class="functiontext">Characters::remove_accent</span><span class="plain">(</span><span class="identifier">charcode</span><span class="plain">);</span>
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">charcode</span><span class="plain"> &gt;= </span><span class="constant">128</span><span class="plain">) </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'-'</span><span class="plain">;</span>
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain">;</span>
<span class="plain">}</span>
</pre>
<p class="inwebparagraph"></p>
<p class="endnote">The function Characters::make_filename_safe appears nowhere else.</p>
<p class="inwebparagraph"><a id="SP6"></a><b>&#167;6. </b>The following strips the accent, if present, from an ISO Latin-1 character:
</p>
<pre class="display">
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Characters::remove_accent</span><span class="plain">(</span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain">) {</span>
<span class="reserved">switch</span><span class="plain"> (</span><span class="identifier">charcode</span><span class="plain">) {</span>
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xC0</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xC1</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xC2</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xC3</span><span class="identifier">:</span>
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xC4</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xC5</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'A'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xE0</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xE1</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xE2</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xE3</span><span class="identifier">:</span>
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xE4</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xE5</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'a'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xC8</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xC9</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xCA</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xCB</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'E'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xE8</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xE9</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xEA</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xEB</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'e'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xCC</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xCD</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xCE</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xCF</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'I'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xEC</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xED</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xEE</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xEF</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'i'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xD2</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xD3</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xD4</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xD5</span><span class="identifier">:</span>
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xD6</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xD8</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'O'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xF2</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xF3</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xF4</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xF5</span><span class="identifier">:</span>
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xF6</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xF8</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'o'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xD9</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xDA</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xDB</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xDC</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'U'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xF9</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xFA</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xFB</span><span class="identifier">:</span><span class="plain"> </span><span class="reserved">case</span><span class="plain"> </span><span class="constant">0xFC</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'u'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xDD</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'Y'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xFD</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'y'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xD1</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'N'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xF1</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'n'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xC7</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'C'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xE7</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'c'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
<span class="reserved">case</span><span class="plain"> </span><span class="constant">0xDF</span><span class="identifier">:</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain"> = </span><span class="character">'s'</span><span class="plain">; </span><span class="reserved">break</span><span class="plain">;</span>
<span class="plain">}</span>
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">charcode</span><span class="plain">;</span>
<span class="plain">}</span>
</pre>
<p class="inwebparagraph"></p>
<p class="endnote">The function Characters::remove_accent is used in <a href="#SP5">&#167;5</a>.</p>
<hr class="tocbar">
<ul class="toc"><li><i>(This section begins Chapter 4: Text Handling.)</i></li><li><a href="4-cst.html">Continue with 'C Strings'</a></li></ul><hr class="tocbar">
<!--End of weave-->
</main>
</body>
</html>