From b3ba94b5a62069ebaf6b069abe57ab42f077a0f3 Mon Sep 17 00:00:00 2001 From: AwesomeAdam54321 Date: Sat, 9 Mar 2024 09:53:47 +0800 Subject: [PATCH] foundation-module: Chapter 1: Nowebify. --- .../Chapter_1/Foundation_Module.nw | 108 +++++++++--------- .../Chapter_1/POSIX_Platforms.nw | 97 ++++++++++------ 2 files changed, 117 insertions(+), 88 deletions(-) diff --git a/foundation-module/Chapter_1/Foundation_Module.nw b/foundation-module/Chapter_1/Foundation_Module.nw index 590e804..101d462 100644 --- a/foundation-module/Chapter_1/Foundation_Module.nw +++ b/foundation-module/Chapter_1/Foundation_Module.nw @@ -2,7 +2,7 @@ Starting up and shutting down. -@h Introduction. +@ \section{Introduction.} The Foundation module supplies some of the conveniences of more modern programming languages to ANSI C. It offers the usual stuff of standard libraries everywhere: memory management, collection classes, filename @@ -16,27 +16,27 @@ language's compiler and outlying tools, including Inweb itself. If you need to write a command-line utility in ANSI C with no dependencies on other tools or libraries to speak of, you could do worse. -To use |foundation|, the Contents section of a web should include: -= (text) +To use [[foundation]], the Contents section of a web should include: + Import: foundation -= + before beginning the chapter rundown. There are then a few conventions -which must be followed. The |main| routine for the client should, as one -of its very first acts, call |Foundation::start()|, and should similarly, just -before it exits, call |Foundation::end()|. Any other module used should be +which must be followed. The [[main]] routine for the client should, as one +of its very first acts, call [[Foundation::start()]], and should similarly, just +before it exits, call [[Foundation::end()]]. Any other module used should be started after Foundation starts, and ended before Foundation ends. In addition, the client's source code needs to define a few symbols to indicate what it needs in the way of memory allocation. For an example, see the code for Inweb itself. -@h Basic definitions. +@ \section{Basic definitions.} These are all from the ANSI C standard library (or the pthread POSIX standard), which means that Inweb will tangle them up to the top of the C source code. Because pthread is not normally available on Windows, a special header is supplied instead for that case. -= (very early code) +<<*>>= #include #include #include @@ -47,65 +47,68 @@ supplied instead for that case. #include #include -@ = -text_stream *DL = NULL; /* Current destination of debugging text: kept |NULL| until opened */ +<<*>>= +text_stream *DL = NULL; /* Current destination of debugging text: kept [[NULL]] until opened */ @ We'll use three truth states, the third of which can also mean "unknown". -@d TRUE 1 -@d FALSE 0 -@d NOT_APPLICABLE 2 +<<*>>= +#define TRUE 1 +#define FALSE 0 +#define NOT_APPLICABLE 2 -@ And we recognise two different encodings for narrow (i.e., |char *|) C strings. +@ And we recognise two different encodings for narrow (i.e., [[char *]]) C strings. -@d UTF8_ENC 1 /* Write as UTF-8 without BOM */ -@d ISO_ENC 2 /* Write as ISO Latin-1 (i.e., no conversion needed) */ +<<*>>= +#define UTF8_ENC 1 /* Write as UTF-8 without BOM */ +#define ISO_ENC 2 /* Write as ISO Latin-1 (i.e., no conversion needed) */ @ It is assumed that our host filing system can manage at least 30-character filenames, that space is legal as a character in a filename, and that trailing -extensions can be longer than 3 characters (in particular, that |.html| is -allowed). There are no clear rules but on Windows |MAX_PATH| can be as low as +extensions can be longer than 3 characters (in particular, that [[.html]] is +allowed). There are no clear rules but on Windows [[MAX_PATH]] can be as low as 260, and on Mac OS X the equivalent limit is 1024; both systems can house files buried more deeply, but in both cases the user interface to the operating system fails to recognise them. Some Linux implementations raise the -equivalent |PATH_MAX| limit as high as 4096. This seems a reasonable +equivalent [[PATH_MAX]] limit as high as 4096. This seems a reasonable compromise in practice: -@d MAX_FILENAME_LENGTH 1025 +<<*>>= +#define MAX_FILENAME_LENGTH 1025 @ Very occasionally we'll store a pointer as data: -= +<<*>>= typedef long int pointer_sized_int; -@h The beginning and the end. +@ \section{The beginning and the end.} As noted above, the client needs to call these when starting up and when shutting down. -The Inweb notation |[[textliterals]]| inserts declarations of I-literals, -that is, literal |text_stream *| values written as |I"strings"|. It should +The Inweb notation [[[[textliterals]]]] inserts declarations of I-literals, +that is, literal [[text_stream *]] values written as [[I"strings"]]. It should never be used anywhere but here. -= +<<*>>= void Foundation::start(int argc, char **argv) { CommandLine::set_locale(argc, argv); Platform::configure_terminal(); Memory::start(); - @; + <>; [[textliterals]]; Time::begin(); Pathnames::start(); - @; - @; - @; + <>; + <>; + <>; } -@ After calling |Foundation::start()|, the client can register further stream +@ After calling [[Foundation::start()]], the client can register further stream writing routines, following these models: they define the meaning of escape -characters in |WRITE|, our version of formatted printing. |%f|, for example, -prints a filename by calling |Filenames::writer|. +characters in [[WRITE]], our version of formatted printing. [[%f]], for example, +prints a filename by calling [[Filenames::writer]]. -@ = +<>= Writers::register_writer('f', &Filenames::writer); Writers::register_writer('p', &Pathnames::writer); Writers::register_writer('v', &VersionNumbers::writer); @@ -116,38 +119,39 @@ switched on or off. Each aspect represents an activity of the program about which a narrative is printed, or not printed, to the debugging log file. The following are always provided, but are all off by default. -@ = +<>= Log::declare_aspect(DEBUGGING_LOG_INCLUSIONS_DA, L"debugging log inclusions", FALSE, FALSE); Log::declare_aspect(SHELL_USAGE_DA, L"shell usage", FALSE, FALSE); Log::declare_aspect(MEMORY_USAGE_DA, L"memory usage", FALSE, FALSE); Log::declare_aspect(TEXT_FILES_DA, L"text files", FALSE, FALSE); -@ Debugging log writers are similar to stream writers, but implement the |$| -escapes only available to the debugging log. For example, |$S| calls the -|Streams::log| function to print a textual representation of the current +@ Debugging log writers are similar to stream writers, but implement the [[$]] +escapes only available to the debugging log. For example, [[$S]] calls the +[[Streams::log]] function to print a textual representation of the current state of a stream. -@ = +<>= Writers::register_logger('a', &Tries::log_avinue); Writers::register_logger('S', &Streams::log); @ We provide an optional service for parsing the command line. By default, -the |-log A| switch makes that aspect active, though it's hyphenated, so -for example |-log memory-usage| or |-log no-memory-usage|. |-fixtime| is +the [[-log A]] switch makes that aspect active, though it's hyphenated, so +for example [[-log memory-usage]] or [[-log no-memory-usage]]. [[-fixtime]] is used to ease automated testing: we don't want to reject the output from some tool just because it contains today's date and not the date when the -test was set up. |-crash| tells the tool to crash on a fatal error, rather +test was set up. [[-crash]] tells the tool to crash on a fatal error, rather than to exit cleanly, to make it easier to diagnose in a debugger. -@e LOG_CLSW from 0 -@e VERSION_CLSW -@e CRASH_CLSW -@e HELP_CLSW -@e FIXTIME_CLSW -@e AT_CLSW -@e LOCALE_CLSW +<<*>>= +enum LOG_CLSW from 0 +enum VERSION_CLSW +enum CRASH_CLSW +enum HELP_CLSW +enum FIXTIME_CLSW +enum AT_CLSW +enum LOCALE_CLSW -@ = +<>= CommandLine::begin_group(FOUNDATION_CLSG, NULL); CommandLine::declare_switch(LOG_CLSW, L"log", 2, L"write the debugging log to include diagnostics on X"); @@ -166,11 +170,11 @@ than to exit cleanly, to make it easier to diagnose in a debugger. CommandLine::end_group(); @ Once the following has been called, it is not safe to use any of the -|foundation| facilities. It should be called on any normal exit, but not on +[[foundation]] facilities. It should be called on any normal exit, but not on an early termination due to a fatal error, as this may lead to thread safety problems. -= +<<*>>= void Foundation::end(void) { if (Log::aspect_switched_on(MEMORY_USAGE_DA)) Memory::log_statistics(); Log::close(); diff --git a/foundation-module/Chapter_1/POSIX_Platforms.nw b/foundation-module/Chapter_1/POSIX_Platforms.nw index 2ffe66b..f9c0a71 100644 --- a/foundation-module/Chapter_1/POSIX_Platforms.nw +++ b/foundation-module/Chapter_1/POSIX_Platforms.nw @@ -13,7 +13,7 @@ purposes we need here, it's simplest to divide all operating systems into two groups: the POSIX group, and Windows. This Foundation module therefore comes with two variant versions of the -|Platform::| section of code. The one you're reading compiles on a POSIX +[[Platform::]] section of code. The one you're reading compiles on a POSIX operating system, and the other one on Windows. @ Some basics that apply to all POSIX-supporting systems. @@ -38,6 +38,8 @@ operating system, and the other one on Windows. #define SHELL_QUOTE_CHARACTER '\'' #define INFORM_FOLDER_RELATIVE_TO_HOME "Library" +#endif + @ \section{Generic Unix.} <<*>>= #ifdef PLATFORM_UNIX @@ -47,12 +49,13 @@ Adam Thornton, and for Ubuntu, Fedora, Debian and so forth, by Philip Chimento) and also for Solaris variants: they can probably be used for any Unix-based system. +<<*>>= #define PLATFORM_STRING "unix" #define INFORM_FOLDER_RELATIVE_TO_HOME "" - -<<*>>= #include +#endif + @ \section{Linux.} These settings are used both for the Linux versions (both command-line, by Adam Thornton, and for Ubuntu, Fedora, Debian and so forth, by Philip @@ -63,6 +66,7 @@ Unix-based system. #ifdef PLATFORM_LINUX #define PLATFORM_STRING "linux" #define INFORM_FOLDER_RELATIVE_TO_HOME "" +#endif <<*>>= #include @@ -75,6 +79,7 @@ These settings are used for Nathan Summers's Android versions. #define PLATFORM_STRING "android" #define SUPPRESS_MAIN #define INFORM_FOLDER_RELATIVE_TO_HOME "" +#endif <<*>>= #include @@ -83,7 +88,7 @@ These settings are used for Nathan Summers's Android versions. When using a Unix-like system such as Cygwin on Windows, it's inevitable that paths will sometimes contain backslashes and sometimes forward slashes, meaning a folder (i.e. directory) divide in either case. So: -(a) When writing such a divider, always write |FOLDER_SEPARATOR|, a backslash; +(a) When writing such a divider, always write [[FOLDER_SEPARATOR]], a backslash; (b) When testing for such a divider, call the following. <<*>>= @@ -121,21 +126,22 @@ always be unavailable: that doesn't mean we can't run on those platforms, just that installation and use of Foundation-built tools is less convenient.) <<*>>= -ifdef PLATFORM_LINUX +#ifdef PLATFORM_LINUX void Platform::where_am_i(wchar_t *p, size_t length) { char buffer[PATH_MAX + 1]; - @; - @; + <>; + <>; } +#endif -@ On Linux, |/proc/self/exe| is a symlink to the current process's executable. +@ On Linux, [[/proc/self/exe]] is a symlink to the current process's executable. Follow that link to find the path. Normally when reading a symlink, one uses -|lstat()| to find the path length instead of guessing |PATH_MAX|, but the -symlinks in |/proc| are special and don't provide a length to |lstat()|. +|lstat()| to find the path length instead of guessing [[PATH_MAX]], but the +symlinks in [[/proc]] are special and don't provide a length to [[lstat()]]. <>= ssize_t link_len = readlink("/proc/self/exe", buffer, PATH_MAX); - if (link_len < 0) @; // unable to find + if (link_len < 0) <>; // unable to find buffer[link_len] = '\0'; @ Next, convert the obtained buffer (which is a string in the local filename @@ -144,7 +150,7 @@ string. <>= size_t convert_len = mbstowcs(p, buffer, length); - if (convert_len == (size_t)-1) @; // wouldn't fit + if (convert_len == (size_t)-1) <>; // wouldn't fit @ And now the Mac version: @@ -160,30 +166,35 @@ void Platform::where_am_i(wchar_t *p, size_t length) { uint32_t tempsize = pathsize; /* Get "a path" to the executable */ - if (_NSGetExecutablePath(relative_path, &tempsize) != 0) @; + if (_NSGetExecutablePath(relative_path, &tempsize) != 0) <>; /* Convert to canonical absolute path */ - if (realpath(relative_path, absolute_path) == NULL) @; + if (realpath(relative_path, absolute_path) == NULL) <>; /* Next, convert the obtained buffer (which is a string in the local * filename encoding, possibly multibyte) to a wide-char string. */ convert_len = mbstowcs(p, absolute_path, length); - if (convert_len == (size_t)-1) @; + if (convert_len == (size_t)-1) <>; } +#endif -@ For Unix, there's nothing we can generically do. #ifdef PLATFORM_UNIX" +@ For Unix, there's nothing we can generically do. <<*>>= +#ifdef PLATFORM_UNIX void Platform::where_am_i(wchar_t *p, size_t length) { - @; + <>; } +#endif -@ On Android, there's no real need for this. #ifdef PLATFORM_ANDROID" +@ On Android, there's no real need for this. <<*>>= +#ifdef PLATFORM_ANDROID void Platform::where_am_i(wchar_t *p, size_t length) { - @; + <>; } +#endif @ All of the above make use of: @@ -198,26 +209,27 @@ void Platform::where_am_i(wchar_t *p, size_t length) { int Platform::system(const char *cmd) { return system(cmd); } +#endif @ In MacOS 10.5, a new implementation of the C standard library -crippled performance of |system()| by placing it behind a global mutex, so +crippled performance of [[system()]] by placing it behind a global mutex, so that it was impossible for two cores to be calling the function at the same time. The net effect of this is that the Inform test suite, executing in Intest, ran in 1/16th speed. This issue didn't come to light until 2019, -however, because the build setting |-mmacosx-version-min=10.4| turned out -to force use of the (perfectly good) pre-10.5 library, where |system()| +however, because the build setting [[-mmacosx-version-min=10.4]] turned out +to force use of the (perfectly good) pre-10.5 library, where [[system()]] continued to run in a multi-threaded way, just as it does on Linux and most all other Unixes. The old library was eventually withdrawn by Apple in 2018, and in any case would stop working at some point in 2019-20 due to the final removal of 32-bit binary support from MacOS. -It took several days to find a pthread-safe way to reimplement |system()|. -The obvious way, using |fork()| and then running |execve()| on the child -process -- essentially the standard way to implement |system()|, if you forget +It took several days to find a pthread-safe way to reimplement [[system()]]. +The obvious way, using [[fork()]] and then running [[execve()]] on the child +process -- essentially the standard way to implement [[system()]], if you forget about signal-handling -- led to obscure and unrepeatable memory corruption bugs in Intest, with the worker threads apparently writing on each other's -memory space. Using |posix_spawn()| instead appears to work better. +memory space. Using [[posix_spawn()]] instead appears to work better. <<*>>= #ifdef PLATFORM_MACOS @@ -239,6 +251,7 @@ int Platform::system(const char *cmd) { } return -1; } +#endif @ \section{ Directory handling.} @@ -276,8 +289,8 @@ void Platform::closedir(void *D) { closedir(dirp); } -@ \section{ Timestamp and file size. -There are implementations of the C standard library where |time_t| has +@ \section{Timestamp and file size.} +There are implementations of the C standard library where [[time_t]] has super-weird behaviour, but on almost all POSIX systems, time 0 corresponds to midnight on 1 January 1970. All we really need is that the "never" value is one which is earlier than any possible timestamp on the files we'll @@ -349,7 +362,7 @@ to survive, given the MacOS team's current hostility to scripting; we're actually running a one-line AppleScript here. <<*>>= -#ifdef-PLATFORM_MACOS +#ifdef PLATFORM_MACOS void Platform::notification(text_stream *text, int happy) { char *sound_name = "Bell.aiff"; if (happy == FALSE) sound_name = "Submarine.aiff"; @@ -359,6 +372,7 @@ void Platform::notification(text_stream *text, int happy) { Shell::run(TEMP); DISCARD_TEXT(TEMP) } +#endif @ @@ -366,12 +380,13 @@ void Platform::notification(text_stream *text, int happy) { #ifndef PLATFORM_MACOS void Platform::notification(text_stream *text, int happy) { } +#endif @ \section{Terminal setup.} The idea of this function is that if anything needs to be done to enable the output of ANSI-standard coloured terminal output, then this function has the chance to do it; similarly, it may need to configure itself to receive console -output with the correct locale (calling |Locales::get(CONSOLE_LOCALE)| to +output with the correct locale (calling [[Locales::get(CONSOLE_LOCALE)]] to find this). On POSIX platforms, so far as we know, nothing need be done. @@ -424,23 +439,29 @@ MacOS does not support. <<*>>= #ifdef PLATFORM_LINUX #include +#endif -@ #ifdef PLATFORM_LINUX" -<<*>>= +@ +<<*>>= +#ifdef PLATFORM_LINUX int Platform::get_core_count(void) { int N = get_nprocs(); if (N < 1) return 1; return N; } +#endif -@ #ifdef PLATFORM_MACOS" +@ While MacOS lacks |sysinfo.h|, it does have |sysctl.h|: <<*>>= +#ifdef PLATFORM_MACOS #include +#endif -@ #ifdef PLATFORM_MACOS" +@ <<*>>= +#ifdef PLATFORM_MACOS int Platform::get_core_count(void) { int N; size_t N_size = sizeof(int); @@ -448,17 +469,21 @@ int Platform::get_core_count(void) { if (N < 1) return 1; return N; } +#endif -@ #ifdef PLATFORM_ANDROID" +@ For Android it seems prudent simply to ignore multithreading: -<<*>>= +<<*>>= +#ifdef PLATFORM_ANDROID int Platform::get_core_count(void) { return 1; } +#endif @ \section{Mutexes.} +<<*>>= #define CREATE_MUTEX(name) static pthread_mutex_t name = PTHREAD_MUTEX_INITIALIZER; #define LOCK_MUTEX(name) pthread_mutex_lock(&name);