inweb-bootstrap/Chapter_4/Programming_Languages.nw

792 lines
30 KiB
Text
Raw Normal View History

2019-02-04 22:26:45 +00:00
[Languages::] Programming Languages.
2020-04-04 19:46:43 +00:00
Defining the programming languages supported by Inweb, loading in their
definitions from files.
2019-02-04 22:26:45 +00:00
2024-03-09 12:44:19 +00:00
@ \section{Languages.}
Programming languages are identified by name: for example, [[C++]] or [[Perl]].
2019-02-04 22:26:45 +00:00
2024-03-09 12:44:19 +00:00
<<*>>=
2020-05-07 18:11:08 +00:00
programming_language *Languages::find_by_name(text_stream *lname, web *W,
int error_if_not_found) {
2020-04-04 19:46:43 +00:00
programming_language *pl;
2024-03-09 12:44:19 +00:00
<<If this is the name of a language already known, return that>>;
<<Read the language definition file with this name>>;
2020-04-04 19:46:43 +00:00
if (Str::ne(pl->language_name, lname))
Errors::fatal_with_text(
"definition of programming language '%S' is for something else", lname);
return pl;
}
2019-02-04 22:26:45 +00:00
2024-03-09 12:44:19 +00:00
<<If this is the name of a language already known, return that>>=
2020-04-04 19:46:43 +00:00
LOOP_OVER(pl, programming_language)
if (Str::eq(lname, pl->language_name))
return pl;
2024-03-09 12:44:19 +00:00
<<Read the language definition file with this name>>=
filename *F = NULL;
if (W) {
2020-04-15 22:45:08 +00:00
pathname *P = Pathnames::down(W->md->path_to_web, I"Dialects");
2024-03-09 12:44:19 +00:00
<<Try P>>;
}
2020-04-05 17:37:43 +00:00
pathname *P = Languages::default_directory();
2024-03-09 12:44:19 +00:00
<<Try P>>;
2020-05-07 18:11:08 +00:00
if (F == NULL) {
if (error_if_not_found)
Errors::fatal_with_text(
"unsupported programming language '%S'", lname);
return NULL;
}
2020-04-04 19:46:43 +00:00
pl = Languages::read_definition(F);
2024-03-09 12:44:19 +00:00
<<Try P>>=
if (F == NULL) {
2020-06-27 22:03:14 +00:00
TEMPORARY_TEXT(leaf)
WRITE_TO(leaf, "%S.ildf", lname);
2020-04-15 22:45:08 +00:00
F = Filenames::in(P, leaf);
2020-06-27 22:03:14 +00:00
DISCARD_TEXT(leaf)
if (TextFiles::exists(F) == FALSE) F = NULL;
}
2020-04-04 19:46:43 +00:00
@ I'm probably showing my age here.
2024-03-09 12:44:19 +00:00
<<*>>=
programming_language *Languages::default(web *W) {
2020-05-07 18:11:08 +00:00
return Languages::find_by_name(I"C", W, TRUE);
2020-04-04 19:46:43 +00:00
}
2020-04-05 17:37:43 +00:00
void Languages::show(OUTPUT_STREAM) {
WRITE("Inweb can see the following programming language definitions:\n\n");
2020-04-07 22:04:32 +00:00
int N = NUMBER_CREATED(programming_language);
programming_language **sorted_table =
2020-05-17 22:05:11 +00:00
Memory::calloc(N, (int) sizeof(programming_language *), ARRAY_SORTING_MREASON);
2020-04-07 22:04:32 +00:00
int i=0; programming_language *pl;
LOOP_OVER(pl, programming_language) sorted_table[i++] = pl;
qsort(sorted_table, (size_t) N, sizeof(programming_language *), Languages::compare_names);
for (int i=0; i<N; i++) {
programming_language *pl = sorted_table[i];
2020-04-05 17:37:43 +00:00
WRITE("%S: %S\n", pl->language_name, pl->language_details);
2020-04-07 22:04:32 +00:00
}
2020-05-17 22:05:11 +00:00
Memory::I7_free(sorted_table, ARRAY_SORTING_MREASON, N*((int) sizeof(programming_language *)));
2020-04-07 22:04:32 +00:00
}
2024-03-09 12:44:19 +00:00
<<*>>=
2020-04-07 22:04:32 +00:00
int Languages::compare_names(const void *ent1, const void *ent2) {
text_stream *tx1 = (*((const programming_language **) ent1))->language_name;
text_stream *tx2 = (*((const programming_language **) ent2))->language_name;
return Str::cmp_insensitive(tx1, tx2);
2020-04-05 17:37:43 +00:00
}
@ We can read every language in a directory:
2024-03-09 12:44:19 +00:00
<<*>>=
2020-04-05 17:37:43 +00:00
void Languages::read_definitions(pathname *P) {
if (P == NULL) P = Languages::default_directory();
scan_directory *D = Directories::open(P);
2020-06-27 22:03:14 +00:00
TEMPORARY_TEXT(leafname)
2020-04-05 17:37:43 +00:00
while (Directories::next(D, leafname)) {
if (Platform::is_folder_separator(Str::get_last_char(leafname)) == FALSE) {
2020-04-15 22:45:08 +00:00
filename *F = Filenames::in(P, leafname);
2020-04-05 17:37:43 +00:00
Languages::read_definition(F);
}
}
2020-06-27 22:03:14 +00:00
DISCARD_TEXT(leafname)
2020-04-05 17:37:43 +00:00
Directories::close(D);
}
pathname *Languages::default_directory(void) {
2020-04-15 22:45:08 +00:00
return Pathnames::down(path_to_inweb, I"Languages");
2020-04-05 17:37:43 +00:00
}
@ So, then, languages are defined by files which are read in, and parsed
into the following structure (one per language):
2019-02-04 22:26:45 +00:00
2024-03-09 12:44:19 +00:00
<<*>>=
2019-02-04 22:26:45 +00:00
typedef struct programming_language {
text_stream *language_name; /* identifies it: see above */
/* then a great many fields set directly in the definition file: */
2019-02-04 22:26:45 +00:00
text_stream *file_extension; /* by default output to a file whose name has this extension */
2020-04-04 06:51:07 +00:00
text_stream *language_details; /* brief explanation of what language is */
int supports_namespaces;
2020-04-04 06:51:07 +00:00
text_stream *line_comment;
2020-04-05 17:37:43 +00:00
text_stream *whole_line_comment;
2020-04-04 12:07:08 +00:00
text_stream *multiline_comment_open;
text_stream *multiline_comment_close;
2020-04-04 06:51:07 +00:00
text_stream *string_literal;
text_stream *string_literal_escape;
text_stream *character_literal;
text_stream *character_literal_escape;
2020-04-04 19:46:43 +00:00
text_stream *binary_literal_prefix;
text_stream *octal_literal_prefix;
text_stream *hexadecimal_literal_prefix;
text_stream *negative_literal_prefix;
2020-04-04 12:07:08 +00:00
text_stream *shebang;
text_stream *line_marker;
text_stream *before_macro_expansion;
text_stream *after_macro_expansion;
text_stream *start_definition;
2020-04-04 13:15:50 +00:00
text_stream *prolong_definition;
2020-04-04 12:07:08 +00:00
text_stream *end_definition;
text_stream *start_ifdef;
text_stream *end_ifdef;
text_stream *start_ifndef;
text_stream *end_ifndef;
2020-04-11 20:39:43 +00:00
wchar_t type_notation[MAX_ILDF_REGEXP_LENGTH];
wchar_t function_notation[MAX_ILDF_REGEXP_LENGTH];
2020-04-04 12:07:08 +00:00
int suppress_disclaimer;
int C_like; /* languages with this set have access to extra features */
2024-03-09 12:44:19 +00:00
struct linked_list *reserved_words; /* of [[reserved_word]] */
2020-04-05 22:28:05 +00:00
struct hash_table built_in_keywords;
struct colouring_language_block *program; /* algorithm for syntax colouring */
2020-05-09 12:05:00 +00:00
struct method_set *methods;
CLASS_DEFINITION
2019-02-04 22:26:45 +00:00
} programming_language;
2024-03-09 12:44:19 +00:00
@ This is a simple one-pass compiler. The [[language_reader_state]] provides
the only state preserved as we work through line by line, except of course
2024-03-09 12:44:19 +00:00
that we are also working on the programming language it is [[defining]]. The
[[current_block]] is the braced block of colouring instructions we are
currently inside.
2020-04-04 19:46:43 +00:00
2024-03-09 12:44:19 +00:00
<<*>>=
2020-04-04 06:51:07 +00:00
typedef struct language_reader_state {
struct programming_language *defining;
struct colouring_language_block *current_block;
2020-04-04 06:51:07 +00:00
} language_reader_state;
programming_language *Languages::read_definition(filename *F) {
2020-04-04 12:07:08 +00:00
programming_language *pl = CREATE(programming_language);
2024-03-09 12:44:19 +00:00
<<Initialise the language to a plain-text state>>;
language_reader_state lrs;
lrs.defining = pl;
lrs.current_block = NULL;
2020-04-05 17:37:43 +00:00
TextFiles::read(F, FALSE, "can't open programming language definition file",
TRUE, Languages::read_definition_line, NULL, (void *) &lrs);
2024-03-09 12:44:19 +00:00
<<Add method calls to the language>>;
return pl;
}
2024-03-09 12:44:19 +00:00
<<Initialise the language to a plain-text state>>=
2020-04-04 12:07:08 +00:00
pl->language_name = NULL;
pl->file_extension = NULL;
pl->supports_namespaces = FALSE;
pl->line_comment = NULL;
2020-04-05 17:37:43 +00:00
pl->whole_line_comment = NULL;
2020-04-04 12:07:08 +00:00
pl->multiline_comment_open = NULL;
pl->multiline_comment_close = NULL;
pl->string_literal = NULL;
pl->string_literal_escape = NULL;
pl->character_literal = NULL;
pl->character_literal_escape = NULL;
2020-04-04 19:46:43 +00:00
pl->binary_literal_prefix = NULL;
pl->octal_literal_prefix = NULL;
pl->hexadecimal_literal_prefix = NULL;
pl->negative_literal_prefix = NULL;
2020-04-04 12:07:08 +00:00
pl->shebang = NULL;
pl->line_marker = NULL;
pl->before_macro_expansion = NULL;
pl->after_macro_expansion = NULL;
pl->start_definition = NULL;
2020-04-04 13:15:50 +00:00
pl->prolong_definition = NULL;
2020-04-04 12:07:08 +00:00
pl->end_definition = NULL;
pl->start_ifdef = NULL;
pl->end_ifdef = NULL;
pl->start_ifndef = NULL;
pl->end_ifndef = NULL;
pl->C_like = FALSE;
pl->suppress_disclaimer = FALSE;
2020-04-11 20:39:43 +00:00
pl->type_notation[0] = 0;
pl->function_notation[0] = 0;
2020-04-04 19:46:43 +00:00
pl->reserved_words = NEW_LINKED_LIST(reserved_word);
Analyser::initialise_hash_table(&(pl->built_in_keywords));
pl->program = NULL;
pl->methods = Methods::new_set();
@ Note that there are two levels of extra privilege: any language calling
itself C-like has functionality for function and structure definitions;
the language whose name is InC gets even more, without having to ask.
Languages have effect through their method calls, which is how those
2024-03-09 12:44:19 +00:00
extra features are provided. The call to [[ACMESupport::add_fallbacks]]
adds generic method calls to give effect to the settings in the definition.
2024-03-09 12:44:19 +00:00
<<Add method calls to the language>>=
2020-04-04 19:46:43 +00:00
if (pl->C_like) CLike::make_c_like(pl);
if (Str::eq(pl->language_name, I"InC")) InCSupport::add_features(pl);
ACMESupport::add_fallbacks(pl);
@ So, then, the above reads the file and feeds it line by line to this:
2024-03-09 12:44:19 +00:00
<<*>>=
void Languages::read_definition_line(text_stream *line, text_file_position *tfp, void *v_state) {
language_reader_state *state = (language_reader_state *) v_state;
programming_language *pl = state->defining;
Str::trim_white_space(line); /* ignore trailing space */
if (Str::len(line) == 0) return; /* ignore blank lines */
2024-03-09 12:44:19 +00:00
if (Str::get_first_char(line) == '#') return; /* lines opening with [[#]] are comments */
match_results mr = Regexp::create_mr();
2024-03-09 12:44:19 +00:00
if (state->current_block) <<Syntax inside a colouring program>>
else <<Syntax outside a colouring program>>;
Regexp::dispose_of(&mr);
2020-04-04 06:51:07 +00:00
}
@ Outside a colouring program, you can do three things: start a program,
declare a reserved keyword, or set a key to a value.
2024-03-09 12:44:19 +00:00
<<Syntax outside a colouring program>>=
if (Regexp::match(&mr, line, L"colouring {")) {
if (pl->program) Errors::in_text_file("duplicate colouring program", tfp);
pl->program = Languages::new_block(NULL, WHOLE_LINE_CRULE_RUN);
state->current_block = pl->program;
2020-04-05 17:37:43 +00:00
} else if (Regexp::match(&mr, line, L"keyword (%C+) of (%c+?)")) {
Languages::reserved(pl, Languages::text(mr.exp[0], tfp, FALSE), Languages::colour(mr.exp[1], tfp), tfp);
} else if (Regexp::match(&mr, line, L"keyword (%C+)")) {
Languages::reserved(pl, Languages::text(mr.exp[0], tfp, FALSE), RESERVED_COLOUR, tfp);
} else if (Regexp::match(&mr, line, L"(%c+) *: *(%c+?)")) {
text_stream *key = mr.exp[0], *value = Str::duplicate(mr.exp[1]);
2020-04-06 11:26:10 +00:00
if (Str::eq(key, I"Name")) pl->language_name = Languages::text(value, tfp, TRUE);
else if (Str::eq(key, I"Details"))
2020-04-06 11:26:10 +00:00
pl->language_details = Languages::text(value, tfp, TRUE);
else if (Str::eq(key, I"Extension"))
2020-04-06 11:26:10 +00:00
pl->file_extension = Languages::text(value, tfp, TRUE);
else if (Str::eq(key, I"Line Comment"))
2020-04-06 11:26:10 +00:00
pl->line_comment = Languages::text(value, tfp, TRUE);
2020-04-05 17:37:43 +00:00
else if (Str::eq(key, I"Whole Line Comment"))
2020-04-06 11:26:10 +00:00
pl->whole_line_comment = Languages::text(value, tfp, TRUE);
else if (Str::eq(key, I"Multiline Comment Open"))
2020-04-06 11:26:10 +00:00
pl->multiline_comment_open = Languages::text(value, tfp, TRUE);
else if (Str::eq(key, I"Multiline Comment Close"))
2020-04-06 11:26:10 +00:00
pl->multiline_comment_close = Languages::text(value, tfp, TRUE);
else if (Str::eq(key, I"String Literal"))
2020-04-06 11:26:10 +00:00
pl->string_literal = Languages::text(value, tfp, TRUE);
else if (Str::eq(key, I"String Literal Escape"))
2020-04-06 11:26:10 +00:00
pl->string_literal_escape = Languages::text(value, tfp, TRUE);
else if (Str::eq(key, I"Character Literal"))
2020-04-06 11:26:10 +00:00
pl->character_literal = Languages::text(value, tfp, TRUE);
else if (Str::eq(key, I"Character Literal Escape"))
2020-04-06 11:26:10 +00:00
pl->character_literal_escape = Languages::text(value, tfp, TRUE);
else if (Str::eq(key, I"Binary Literal Prefix"))
2020-04-06 11:26:10 +00:00
pl->binary_literal_prefix = Languages::text(value, tfp, TRUE);
else if (Str::eq(key, I"Octal Literal Prefix"))
2020-04-06 11:26:10 +00:00
pl->octal_literal_prefix = Languages::text(value, tfp, TRUE);
else if (Str::eq(key, I"Hexadecimal Literal Prefix"))
2020-04-06 11:26:10 +00:00
pl->hexadecimal_literal_prefix = Languages::text(value, tfp, TRUE);
else if (Str::eq(key, I"Negative Literal Prefix"))
2020-04-06 11:26:10 +00:00
pl->negative_literal_prefix = Languages::text(value, tfp, TRUE);
else if (Str::eq(key, I"Shebang"))
2020-04-06 11:26:10 +00:00
pl->shebang = Languages::text(value, tfp, TRUE);
else if (Str::eq(key, I"Line Marker"))
2020-04-06 11:26:10 +00:00
pl->line_marker = Languages::text(value, tfp, TRUE);
2020-04-05 17:37:43 +00:00
else if (Str::eq(key, I"Before Named Paragraph Expansion"))
2020-04-06 11:26:10 +00:00
pl->before_macro_expansion = Languages::text(value, tfp, TRUE);
2020-04-05 17:37:43 +00:00
else if (Str::eq(key, I"After Named Paragraph Expansion"))
2020-04-06 11:26:10 +00:00
pl->after_macro_expansion = Languages::text(value, tfp, TRUE);
else if (Str::eq(key, I"Start Definition"))
2020-04-06 11:26:10 +00:00
pl->start_definition = Languages::text(value, tfp, TRUE);
else if (Str::eq(key, I"Prolong Definition"))
2020-04-06 11:26:10 +00:00
pl->prolong_definition = Languages::text(value, tfp, TRUE);
else if (Str::eq(key, I"End Definition"))
2020-04-06 11:26:10 +00:00
pl->end_definition = Languages::text(value, tfp, TRUE);
else if (Str::eq(key, I"Start Ifdef"))
2020-04-06 11:26:10 +00:00
pl->start_ifdef = Languages::text(value, tfp, TRUE);
else if (Str::eq(key, I"Start Ifndef"))
2020-04-06 11:26:10 +00:00
pl->start_ifndef = Languages::text(value, tfp, TRUE);
else if (Str::eq(key, I"End Ifdef"))
2020-04-06 11:26:10 +00:00
pl->end_ifdef = Languages::text(value, tfp, TRUE);
else if (Str::eq(key, I"End Ifndef"))
2020-04-06 11:26:10 +00:00
pl->end_ifndef = Languages::text(value, tfp, TRUE);
else if (Str::eq(key, I"C-Like"))
pl->C_like = Languages::boolean(value, tfp);
else if (Str::eq(key, I"Suppress Disclaimer"))
pl->suppress_disclaimer = Languages::boolean(value, tfp);
else if (Str::eq(key, I"Supports Namespaces"))
pl->supports_namespaces = Languages::boolean(value, tfp);
2020-04-11 20:39:43 +00:00
else if (Str::eq(key, I"Function Declaration Notation"))
Languages::regexp(pl->function_notation, value, tfp);
else if (Str::eq(key, I"Type Declaration Notation"))
Languages::regexp(pl->type_notation, value, tfp);
else {
Errors::in_text_file("unknown property name before ':'", tfp);
}
} else {
Errors::in_text_file("line in language definition illegible", tfp);
}
@ Inside a colouring program, you can close the current block (which may be
the entire program), open a new block to apply to each character or to
runs of a given colour, or give an if-X-then-Y rule:
2020-04-04 19:46:43 +00:00
2024-03-09 12:44:19 +00:00
<<Syntax inside a colouring program>>=
if (Str::eq(line, I"}")) {
state->current_block = state->current_block->parent;
} else if (Regexp::match(&mr, line, L"characters {")) {
colouring_rule *rule = Languages::new_rule(state->current_block);
rule->execute_block =
Languages::new_block(state->current_block, CHARACTERS_CRULE_RUN);
state->current_block = rule->execute_block;
} else if (Regexp::match(&mr, line, L"characters in (%c+) {")) {
colouring_rule *rule = Languages::new_rule(state->current_block);
rule->execute_block =
Languages::new_block(state->current_block, CHARACTERS_IN_CRULE_RUN);
rule->execute_block->char_set = Languages::text(mr.exp[0], tfp, FALSE);
state->current_block = rule->execute_block;
} else if (Regexp::match(&mr, line, L"runs of (%c+) {")) {
colouring_rule *rule = Languages::new_rule(state->current_block);
wchar_t r = UNQUOTED_COLOUR;
if (Str::ne(mr.exp[0], I"unquoted")) r = Languages::colour(mr.exp[0], tfp);
rule->execute_block = Languages::new_block(state->current_block, (int) r);
state->current_block = rule->execute_block;
2020-04-05 17:37:43 +00:00
} else if (Regexp::match(&mr, line, L"instances of (%c+) {")) {
colouring_rule *rule = Languages::new_rule(state->current_block);
rule->execute_block = Languages::new_block(state->current_block, INSTANCES_CRULE_RUN);
2020-04-06 11:26:10 +00:00
rule->execute_block->run_instance = Languages::text(mr.exp[0], tfp, FALSE);
2020-04-05 17:37:43 +00:00
state->current_block = rule->execute_block;
} else if (Regexp::match(&mr, line, L"matches of (%c+) {")) {
colouring_rule *rule = Languages::new_rule(state->current_block);
rule->execute_block = Languages::new_block(state->current_block, MATCHES_CRULE_RUN);
Languages::regexp(rule->execute_block->match_regexp_text, mr.exp[0], tfp);
state->current_block = rule->execute_block;
} else if (Regexp::match(&mr, line, L"brackets in (%c+) {")) {
colouring_rule *rule = Languages::new_rule(state->current_block);
rule->execute_block = Languages::new_block(state->current_block, BRACKETS_CRULE_RUN);
Languages::regexp(rule->execute_block->match_regexp_text, mr.exp[0], tfp);
state->current_block = rule->execute_block;
} else {
2020-04-05 17:37:43 +00:00
int at = -1, quoted = FALSE;
for (int i=0; i<Str::len(line)-1; i++) {
if (Str::get_at(line, i) == '"') quoted = quoted?FALSE:TRUE;
if ((quoted) && (Str::get_at(line, i) == '\\')) i++;
if ((quoted == FALSE) &&
(Str::get_at(line, i) == '=') && (Str::get_at(line, i+1) == '>')) at = i;
}
if (at >= 0) {
2020-06-27 22:03:14 +00:00
TEMPORARY_TEXT(premiss)
TEMPORARY_TEXT(conclusion)
2020-04-05 17:37:43 +00:00
Str::substr(premiss, Str::start(line), Str::at(line, at));
Str::substr(conclusion, Str::at(line, at+2), Str::end(line));
Languages::parse_rule(state, premiss, conclusion, tfp);
2020-06-27 22:03:14 +00:00
DISCARD_TEXT(conclusion)
DISCARD_TEXT(premiss)
2020-04-05 17:37:43 +00:00
} else {
Errors::in_text_file("line in colouring block illegible", tfp);
}
}
2020-04-04 19:46:43 +00:00
2024-03-09 12:44:19 +00:00
@ \section{Blocks.}
These are code blocks of colouring instructions. A block whose [[parent]] is [[NULL]]
represents a complete program.
2024-03-09 12:44:19 +00:00
<<*>>=
#define WHOLE_LINE_CRULE_RUN -1 /* This block applies to the whole snippet being coloured */
#define CHARACTERS_CRULE_RUN -2 /* This block applies to each character in turn */
#define CHARACTERS_IN_CRULE_RUN -3 /* This block applies to each character from a set in turn */
#define INSTANCES_CRULE_RUN -4 /* This block applies to each instance in turn */
#define MATCHES_CRULE_RUN -5 /* This block applies to each match against a regexp in turn */
#define BRACKETS_CRULE_RUN -6 /* This block applies to bracketed subexpressions in a regexp */
2020-04-04 19:46:43 +00:00
2024-03-09 12:44:19 +00:00
<<*>>=
typedef struct colouring_language_block {
2024-03-09 12:44:19 +00:00
struct linked_list *rules; /* of [[colouring_rule]] */
struct colouring_language_block *parent; /* or [[NULL]] for the topmost one */
int run; /* one of the [[*_CRULE_RUN]] values, or else a colour */
struct text_stream *run_instance; /* used only for [[INSTANCES_CRULE_RUN]] */
struct text_stream *char_set; /* used only for [[CHARACTERS_IN_CRULE_RUN]] */
wchar_t match_regexp_text[MAX_ILDF_REGEXP_LENGTH]; /* used for [[MATCHES_CRULE_RUN|, |BRACKETS_CRULE_RUN]] */
/* workspace during painting */
struct match_results mr; /* of a regular expression */
2020-05-09 12:05:00 +00:00
CLASS_DEFINITION
} colouring_language_block;
2020-04-04 19:46:43 +00:00
2024-03-09 12:44:19 +00:00
<<*>>=
colouring_language_block *Languages::new_block(colouring_language_block *within, int r) {
colouring_language_block *block = CREATE(colouring_language_block);
2020-04-04 19:46:43 +00:00
block->rules = NEW_LINKED_LIST(colouring_rule);
block->parent = within;
block->run = r;
2020-04-05 17:37:43 +00:00
block->run_instance = NULL;
block->char_set = NULL;
block->match_regexp_text[0] = 0;
block->mr = Regexp::create_mr();
2020-04-04 19:46:43 +00:00
return block;
}
2024-03-09 12:44:19 +00:00
@ \section{Colouring Rules.}
Each individual rule has the form: if a premiss, then a conclusion. It will be
applied to a snippet of text, and the premiss can test that, together with a
little context before it (where available).
Note that rules can be unconditional, in that the premiss always passes.
2024-03-09 12:44:19 +00:00
<<*>>=
#define NOT_A_RULE_PREFIX 1 /* this isn't a prefix rule */
#define UNSPACED_RULE_PREFIX 2 /* for [[prefix P]] */
#define SPACED_RULE_PREFIX 3 /* for [[spaced prefix P]] */
#define OPTIONALLY_SPACED_RULE_PREFIX 4 /* for [[optionally spaced prefix P]] */
#define UNSPACED_RULE_SUFFIX 5 /* for [[suffix P]] */
#define SPACED_RULE_SUFFIX 6 /* for [[spaced suffix P]] */
#define OPTIONALLY_SPACED_RULE_SUFFIX 7 /* for [[optionally spaced suffix P]] */
2024-03-09 12:44:19 +00:00
#define MAX_ILDF_REGEXP_LENGTH 64
2024-03-09 12:44:19 +00:00
<<*>>=
typedef struct colouring_rule {
/* the premiss: */
2024-03-09 12:44:19 +00:00
int sense; /* [[FALSE]] to negate the condition */
wchar_t match_colour; /* for [[coloured C|, or else |NOT_A_COLOUR]] */
wchar_t match_keyword_of_colour; /* for [[keyword C|, or else |NOT_A_COLOUR]] */
struct text_stream *match_text; /* or length 0 to mean "anything" */
2024-03-09 12:44:19 +00:00
int match_prefix; /* one of the [[*_RULE_PREFIX]] values above */
wchar_t match_regexp_text[MAX_ILDF_REGEXP_LENGTH];
2024-03-09 12:44:19 +00:00
int number; /* for [[number N]] rules; 0 for others */
int number_of; /* for [[number N of M]] rules; 0 for others */
2020-04-04 06:51:07 +00:00
/* the conclusion: */
2024-03-09 12:44:19 +00:00
struct colouring_language_block *execute_block; /* or [[NULL]], in which case... */
2020-07-05 14:33:00 +00:00
wchar_t set_to_colour; /* ...paint the snippet in this colour */
wchar_t set_prefix_to_colour; /* ...also paint this (same for suffix) */
2020-04-05 22:28:05 +00:00
int debug; /* ...or print debugging text to console */
/* workspace during painting */
int fix_position; /* where the prefix or suffix started */
struct match_results mr; /* of a regular expression */
2020-05-09 12:05:00 +00:00
CLASS_DEFINITION
} colouring_rule;
2020-04-04 06:51:07 +00:00
2024-03-09 12:44:19 +00:00
<<*>>=
colouring_rule *Languages::new_rule(colouring_language_block *within) {
if (within == NULL) internal_error("rule outside block");
colouring_rule *rule = CREATE(colouring_rule);
ADD_TO_LINKED_LIST(rule, colouring_rule, within->rules);
rule->sense = TRUE;
rule->match_colour = NOT_A_COLOUR;
rule->match_text = NULL;
rule->match_prefix = NOT_A_RULE_PREFIX;
rule->match_keyword_of_colour = NOT_A_COLOUR;
rule->match_regexp_text[0] = 0;
rule->number = 0;
rule->number_of = 0;
2020-04-05 22:28:05 +00:00
rule->set_to_colour = NOT_A_COLOUR;
rule->set_prefix_to_colour = NOT_A_COLOUR;
rule->execute_block = NULL;
2020-04-05 22:28:05 +00:00
rule->debug = FALSE;
rule->fix_position = 0;
rule->mr = Regexp::create_mr();
return rule;
}
2024-03-09 12:44:19 +00:00
<<*>>=
void Languages::parse_rule(language_reader_state *state, text_stream *premiss,
text_stream *action, text_file_position *tfp) {
match_results mr = Regexp::create_mr();
2020-04-04 19:46:43 +00:00
colouring_rule *rule = Languages::new_rule(state->current_block);
2020-04-04 06:51:07 +00:00
Str::trim_white_space(premiss); Str::trim_white_space(action);
2024-03-09 12:44:19 +00:00
<<Parse the premiss>>;
<<Parse the conclusion>>;
Regexp::dispose_of(&mr);
}
2024-03-09 12:44:19 +00:00
<<Parse the premiss>>=
while (Regexp::match(&mr, premiss, L"not (%c+)")) {
rule->sense = (rule->sense)?FALSE:TRUE;
Str::clear(premiss); Str::copy(premiss, mr.exp[0]);
}
if (Regexp::match(&mr, premiss, L"number (%d+)")) {
rule->number = Str::atoi(mr.exp[0], 0);
} else if (Regexp::match(&mr, premiss, L"number (%d+) of (%d+)")) {
rule->number = Str::atoi(mr.exp[0], 0);
rule->number_of = Str::atoi(mr.exp[1], 0);
} else if (Regexp::match(&mr, premiss, L"keyword of (%c+)")) {
rule->match_keyword_of_colour = Languages::colour(mr.exp[0], tfp);
2020-04-05 17:37:43 +00:00
} else if (Regexp::match(&mr, premiss, L"keyword")) {
Errors::in_text_file("ambiguous: make it keyword of !reserved or \"keyword\"", tfp);
} else if (Regexp::match(&mr, premiss, L"prefix (%c+)")) {
rule->match_prefix = UNSPACED_RULE_PREFIX;
2020-04-06 11:26:10 +00:00
rule->match_text = Languages::text(mr.exp[0], tfp, FALSE);
} else if (Regexp::match(&mr, premiss, L"matching (%c+)")) {
Languages::regexp(rule->match_regexp_text, mr.exp[0], tfp);
} else if (Regexp::match(&mr, premiss, L"spaced prefix (%c+)")) {
rule->match_prefix = SPACED_RULE_PREFIX;
2020-04-06 11:26:10 +00:00
rule->match_text = Languages::text(mr.exp[0], tfp, FALSE);
} else if (Regexp::match(&mr, premiss, L"optionally spaced prefix (%c+)")) {
rule->match_prefix = OPTIONALLY_SPACED_RULE_PREFIX;
2020-04-06 11:26:10 +00:00
rule->match_text = Languages::text(mr.exp[0], tfp, FALSE);
2020-04-05 17:37:43 +00:00
} else if (Regexp::match(&mr, premiss, L"suffix (%c+)")) {
rule->match_prefix = UNSPACED_RULE_SUFFIX;
2020-04-06 11:26:10 +00:00
rule->match_text = Languages::text(mr.exp[0], tfp, FALSE);
2020-04-05 17:37:43 +00:00
} else if (Regexp::match(&mr, premiss, L"spaced suffix (%c+)")) {
rule->match_prefix = SPACED_RULE_SUFFIX;
2020-04-06 11:26:10 +00:00
rule->match_text = Languages::text(mr.exp[0], tfp, FALSE);
2020-04-05 17:37:43 +00:00
} else if (Regexp::match(&mr, premiss, L"optionally spaced suffix (%c+)")) {
rule->match_prefix = OPTIONALLY_SPACED_RULE_SUFFIX;
2020-04-06 11:26:10 +00:00
rule->match_text = Languages::text(mr.exp[0], tfp, FALSE);
} else if (Regexp::match(&mr, premiss, L"coloured (%c+)")) {
rule->match_colour = Languages::colour(mr.exp[0], tfp);
2020-04-05 17:37:43 +00:00
} else if (Str::len(premiss) > 0) {
2020-04-06 11:26:10 +00:00
rule->match_text = Languages::text(premiss, tfp, FALSE);
2020-04-04 06:51:07 +00:00
}
2024-03-09 12:44:19 +00:00
<<Parse the conclusion>>=
2020-04-04 06:51:07 +00:00
if (Str::eq(action, I"{")) {
rule->execute_block =
Languages::new_block(state->current_block, WHOLE_LINE_CRULE_RUN);
state->current_block = rule->execute_block;
2020-04-05 22:28:05 +00:00
} else if (Regexp::match(&mr, action, L"(!%c+) on prefix")) {
rule->set_prefix_to_colour = Languages::colour(mr.exp[0], tfp);
} else if (Regexp::match(&mr, action, L"(!%c+) on suffix")) {
rule->set_prefix_to_colour = Languages::colour(mr.exp[0], tfp);
} else if (Regexp::match(&mr, action, L"(!%c+) on both")) {
rule->set_to_colour = Languages::colour(mr.exp[0], tfp);
rule->set_prefix_to_colour = rule->set_to_colour;
2020-04-04 06:51:07 +00:00
} else if (Str::get_first_char(action) == '!') {
rule->set_to_colour = Languages::colour(action, tfp);
2020-04-05 22:28:05 +00:00
} else if (Str::eq(action, I"debug")) {
rule->debug = TRUE;
2020-04-04 06:51:07 +00:00
} else {
Errors::in_text_file("action after '=>' illegible", tfp);
}
2024-03-09 12:44:19 +00:00
@ \section{Reserved words.}
Note that these can come in any colour, though usually it's [[!reserved]].
2024-03-09 12:44:19 +00:00
<<*>>=
typedef struct reserved_word {
struct text_stream *word;
int colour;
2020-05-09 12:05:00 +00:00
CLASS_DEFINITION
} reserved_word;
reserved_word *Languages::reserved(programming_language *pl, text_stream *W, wchar_t C,
text_file_position *tfp) {
reserved_word *rw;
LOOP_OVER_LINKED_LIST(rw, reserved_word, pl->reserved_words)
if (Str::eq(rw->word, W)) {
Errors::in_text_file("duplicate reserved word", tfp);
}
rw = CREATE(reserved_word);
rw->word = Str::duplicate(W);
rw->colour = (int) C;
ADD_TO_LINKED_LIST(rw, reserved_word, pl->reserved_words);
Analyser::mark_reserved_word(&(pl->built_in_keywords), rw->word, (int) C);
return rw;
}
2024-03-09 12:44:19 +00:00
@ \section{Expressions.}
Language definition files have three types of data: colours, booleans, and
text. Colours first. Note that there are two pseudo-colours used above,
but which are not expressible in the syntax of this file.
2020-04-04 19:46:43 +00:00
2024-03-09 12:44:19 +00:00
<<*>>=
#define DEFINITION_COLOUR 'd'
#define FUNCTION_COLOUR 'f'
#define RESERVED_COLOUR 'r'
#define ELEMENT_COLOUR 'e'
#define IDENTIFIER_COLOUR 'i'
#define CHARACTER_COLOUR 'c'
#define CONSTANT_COLOUR 'n'
#define STRING_COLOUR 's'
#define PLAIN_COLOUR 'p'
#define EXTRACT_COLOUR 'x'
#define COMMENT_COLOUR '!'
#define NEWLINE_COLOUR '\n'
#define NOT_A_COLOUR ' '
#define UNQUOTED_COLOUR '_'
<<*>>=
wchar_t Languages::colour(text_stream *T, text_file_position *tfp) {
2020-04-04 06:51:07 +00:00
if (Str::get_first_char(T) != '!') {
Errors::in_text_file("colour names must begin with !", tfp);
return PLAIN_COLOUR;
}
if (Str::eq(T, I"!string")) return STRING_COLOUR;
else if (Str::eq(T, I"!function")) return FUNCTION_COLOUR;
2020-04-05 17:37:43 +00:00
else if (Str::eq(T, I"!definition")) return DEFINITION_COLOUR;
2020-04-04 06:51:07 +00:00
else if (Str::eq(T, I"!reserved")) return RESERVED_COLOUR;
else if (Str::eq(T, I"!element")) return ELEMENT_COLOUR;
else if (Str::eq(T, I"!identifier")) return IDENTIFIER_COLOUR;
2020-04-20 22:26:08 +00:00
else if (Str::eq(T, I"!character")) return CHARACTER_COLOUR;
2020-04-04 06:51:07 +00:00
else if (Str::eq(T, I"!constant")) return CONSTANT_COLOUR;
else if (Str::eq(T, I"!plain")) return PLAIN_COLOUR;
else if (Str::eq(T, I"!extract")) return EXTRACT_COLOUR;
else if (Str::eq(T, I"!comment")) return COMMENT_COLOUR;
else {
Errors::in_text_file("no such !colour", tfp);
return PLAIN_COLOUR;
}
}
2024-03-09 12:44:19 +00:00
@ A boolean must be written as [[true]] or [[false]].
2024-03-09 12:44:19 +00:00
<<*>>=
2020-04-04 12:07:08 +00:00
int Languages::boolean(text_stream *T, text_file_position *tfp) {
if (Str::eq(T, I"true")) return TRUE;
else if (Str::eq(T, I"false")) return FALSE;
else {
Errors::in_text_file("must be true or false", tfp);
return FALSE;
}
}
2019-02-04 22:26:45 +00:00
2024-03-09 12:44:19 +00:00
@ In text, [[\n]] represents a newline, [[\s]] a space and [[\t]] a tab. Spaces
can be given in the ordinary way inside a text in any case. [[\\]] is a
literal backslash.
2024-03-09 12:44:19 +00:00
<<*>>=
2020-04-06 11:26:10 +00:00
text_stream *Languages::text(text_stream *T, text_file_position *tfp, int allow) {
2020-04-04 12:07:08 +00:00
text_stream *V = Str::new();
2020-04-05 17:37:43 +00:00
if (Str::len(T) > 0) {
2020-04-06 11:26:10 +00:00
int bareword = TRUE, spaced = FALSE, from = 0, to = Str::len(T)-1;
2020-04-05 17:37:43 +00:00
if ((to > from) &&
(Str::get_at(T, from) == '"') && (Str::get_at(T, to) == '"')) {
bareword = FALSE; from++; to--;
}
for (int i=from; i<=to; i++) {
wchar_t c = Str::get_at(T, i);
2020-04-06 11:26:10 +00:00
if (c == ' ') spaced = TRUE;
2020-04-05 17:37:43 +00:00
if ((c == '\\') && (Str::get_at(T, i+1) == 'n')) {
PUT_TO(V, '\n');
i++;
} else if ((c == '\\') && (Str::get_at(T, i+1) == 's')) {
PUT_TO(V, ' ');
i++;
} else if ((c == '\\') && (Str::get_at(T, i+1) == 't')) {
PUT_TO(V, '\t');
i++;
} else if ((c == '\\') && (Str::get_at(T, i+1) == '\\')) {
PUT_TO(V, '\\');
i++;
} else if ((bareword == FALSE) && (c == '\\') && (Str::get_at(T, i+1) == '"')) {
PUT_TO(V, '"');
i++;
} else if ((bareword == FALSE) && (c == '"')) {
Errors::in_text_file(
"backslash needed before internal double-quotation mark", tfp);
} else if ((bareword) && (c == '!') && (i == from)) {
Errors::in_text_file(
"a literal starting with ! must be in double-quotation marks", tfp);
} else if ((bareword) && (c == '/')) {
Errors::in_text_file(
"forward slashes can only be used in quoted strings", tfp);
2020-04-06 11:26:10 +00:00
} else if ((bareword) && (c == '"')) {
Errors::in_text_file(
"double-quotation marks can only be used in quoted strings", tfp);
2020-04-05 17:37:43 +00:00
} else {
PUT_TO(V, c);
}
2020-04-04 12:07:08 +00:00
}
2020-04-06 11:26:10 +00:00
if ((bareword) && (spaced) && (allow == FALSE)) {
2020-06-27 22:03:14 +00:00
TEMPORARY_TEXT(err)
2020-04-06 11:26:10 +00:00
WRITE_TO(err, "'%S' seems to be literal text, but if so it needs double-quotation marks", T);
Errors::in_text_file_S(err, tfp);
2020-06-27 22:03:14 +00:00
DISCARD_TEXT(err)
2020-04-06 11:26:10 +00:00
}
if (bareword) {
int rw = FALSE;
if (Str::eq(V, I"both")) rw = TRUE;
if (Str::eq(V, I"brackets")) rw = TRUE;
if (Str::eq(V, I"characters")) rw = TRUE;
if (Str::eq(V, I"coloured")) rw = TRUE;
if (Str::eq(V, I"colouring")) rw = TRUE;
if (Str::eq(V, I"debug")) rw = TRUE;
if (Str::eq(V, I"false")) rw = TRUE;
if (Str::eq(V, I"in")) rw = TRUE;
if (Str::eq(V, I"instances")) rw = TRUE;
if (Str::eq(V, I"keyword")) rw = TRUE;
if (Str::eq(V, I"matches")) rw = TRUE;
if (Str::eq(V, I"matching")) rw = TRUE;
if (Str::eq(V, I"not")) rw = TRUE;
if (Str::eq(V, I"of")) rw = TRUE;
if (Str::eq(V, I"on")) rw = TRUE;
if (Str::eq(V, I"optionally")) rw = TRUE;
if (Str::eq(V, I"prefix")) rw = TRUE;
if (Str::eq(V, I"runs")) rw = TRUE;
if (Str::eq(V, I"spaced")) rw = TRUE;
if (Str::eq(V, I"suffix")) rw = TRUE;
if (Str::eq(V, I"true")) rw = TRUE;
if (Str::eq(V, I"unquoted")) rw = TRUE;
if (rw) {
2020-06-27 22:03:14 +00:00
TEMPORARY_TEXT(err)
WRITE_TO(err, "'%S' is a reserved word, so you should put it in double-quotation marks", V);
Errors::in_text_file_S(err, tfp);
2020-06-27 22:03:14 +00:00
DISCARD_TEXT(err)
}
}
2020-04-04 12:07:08 +00:00
}
return V;
2019-02-04 22:26:45 +00:00
}
@ And regular expressions.
2024-03-09 12:44:19 +00:00
<<*>>=
void Languages::regexp(wchar_t *write_to, text_stream *T, text_file_position *tfp) {
if (write_to == NULL) internal_error("no buffer");
write_to[0] = 0;
if (Str::len(T) > 0) {
int from = 0, to = Str::len(T)-1, x = 0;
if ((to > from) &&
(Str::get_at(T, from) == '/') && (Str::get_at(T, to) == '/')) {
from++; to--;
for (int i=from; i<=to; i++) {
wchar_t c = Str::get_at(T, i);
if (c == '\\') {
wchar_t w = Str::get_at(T, i+1);
if (w == '\\') {
x = Languages::add_to_regexp(write_to, x, w);
} else if (w == 'd') {
x = Languages::add_escape_to_regexp(write_to, x, 'd');
} else if (w == 't') {
x = Languages::add_escape_to_regexp(write_to, x, 't');
} else if (w == 's') {
x = Languages::add_to_regexp(write_to, x, ' ');
} else if (w == 'S') {
x = Languages::add_escape_to_regexp(write_to, x, 'C');
} else if (w == '"') {
x = Languages::add_escape_to_regexp(write_to, x, 'q');
} else {
x = Languages::add_escape_to_regexp(write_to, x, w);
}
i++;
continue;
}
if (c == '.') {
x = Languages::add_escape_to_regexp(write_to, x, 'c');
continue;
}
if (c == '%') {
x = Languages::add_escape_to_regexp(write_to, x, '%');
continue;
}
x = Languages::add_to_regexp(write_to, x, c);
}
} else {
Errors::in_text_file(
"the expression to match must be in slashes '/'", tfp);
}
if (x >= MAX_ILDF_REGEXP_LENGTH)
Errors::in_text_file(
"the expression to match is too long", tfp);
}
}
int Languages::add_to_regexp(wchar_t *write_to, int i, wchar_t c) {
if (i < MAX_ILDF_REGEXP_LENGTH) write_to[i++] = c;
return i;
}
int Languages::add_escape_to_regexp(wchar_t *write_to, int i, wchar_t c) {
i = Languages::add_to_regexp(write_to, i, '%');
i = Languages::add_to_regexp(write_to, i, c);
return i;
}