788 lines
30 KiB
OpenEdge ABL
788 lines
30 KiB
OpenEdge ABL
[Languages::] Programming Languages.
|
|
|
|
Defining the programming languages supported by Inweb, loading in their
|
|
definitions from files.
|
|
|
|
@h Languages.
|
|
Programming languages are identified by name: for example, |C++| or |Perl|.
|
|
|
|
@ =
|
|
programming_language *Languages::find_by_name(text_stream *lname, web *W,
|
|
int error_if_not_found) {
|
|
programming_language *pl;
|
|
@<If this is the name of a language already known, return that@>;
|
|
@<Read the language definition file with this name@>;
|
|
if (Str::ne(pl->language_name, lname))
|
|
Errors::fatal_with_text(
|
|
"definition of programming language '%S' is for something else", lname);
|
|
return pl;
|
|
}
|
|
|
|
@<If this is the name of a language already known, return that@> =
|
|
LOOP_OVER(pl, programming_language)
|
|
if (Str::eq(lname, pl->language_name))
|
|
return pl;
|
|
|
|
@<Read the language definition file with this name@> =
|
|
filename *F = NULL;
|
|
if (W) {
|
|
pathname *P = Pathnames::down(W->md->path_to_web, I"Dialects");
|
|
@<Try P@>;
|
|
}
|
|
pathname *P = Languages::default_directory();
|
|
@<Try P@>;
|
|
if (F == NULL) {
|
|
if (error_if_not_found)
|
|
Errors::fatal_with_text(
|
|
"unsupported programming language '%S'", lname);
|
|
return NULL;
|
|
}
|
|
pl = Languages::read_definition(F);
|
|
|
|
@<Try P@> =
|
|
if (F == NULL) {
|
|
TEMPORARY_TEXT(leaf)
|
|
WRITE_TO(leaf, "%S.ildf", lname);
|
|
F = Filenames::in(P, leaf);
|
|
DISCARD_TEXT(leaf)
|
|
if (TextFiles::exists(F) == FALSE) F = NULL;
|
|
}
|
|
|
|
@ I'm probably showing my age here.
|
|
|
|
=
|
|
programming_language *Languages::default(web *W) {
|
|
return Languages::find_by_name(I"C", W, TRUE);
|
|
}
|
|
|
|
void Languages::show(OUTPUT_STREAM) {
|
|
WRITE("Inweb can see the following programming language definitions:\n\n");
|
|
int N = NUMBER_CREATED(programming_language);
|
|
programming_language **sorted_table =
|
|
Memory::calloc(N, (int) sizeof(programming_language *), ARRAY_SORTING_MREASON);
|
|
int i=0; programming_language *pl;
|
|
LOOP_OVER(pl, programming_language) sorted_table[i++] = pl;
|
|
qsort(sorted_table, (size_t) N, sizeof(programming_language *), Languages::compare_names);
|
|
|
|
for (int i=0; i<N; i++) {
|
|
programming_language *pl = sorted_table[i];
|
|
WRITE("%S: %S\n", pl->language_name, pl->language_details);
|
|
}
|
|
Memory::I7_free(sorted_table, ARRAY_SORTING_MREASON, N*((int) sizeof(programming_language *)));
|
|
}
|
|
|
|
@ =
|
|
int Languages::compare_names(const void *ent1, const void *ent2) {
|
|
text_stream *tx1 = (*((const programming_language **) ent1))->language_name;
|
|
text_stream *tx2 = (*((const programming_language **) ent2))->language_name;
|
|
return Str::cmp_insensitive(tx1, tx2);
|
|
}
|
|
|
|
@ We can read every language in a directory:
|
|
|
|
=
|
|
void Languages::read_definitions(pathname *P) {
|
|
if (P == NULL) P = Languages::default_directory();
|
|
scan_directory *D = Directories::open(P);
|
|
TEMPORARY_TEXT(leafname)
|
|
while (Directories::next(D, leafname)) {
|
|
if (Platform::is_folder_separator(Str::get_last_char(leafname)) == FALSE) {
|
|
filename *F = Filenames::in(P, leafname);
|
|
Languages::read_definition(F);
|
|
}
|
|
}
|
|
DISCARD_TEXT(leafname)
|
|
Directories::close(D);
|
|
}
|
|
|
|
pathname *Languages::default_directory(void) {
|
|
return Pathnames::down(path_to_inweb, I"Languages");
|
|
}
|
|
|
|
@ So, then, languages are defined by files which are read in, and parsed
|
|
into the following structure (one per language):
|
|
|
|
=
|
|
typedef struct programming_language {
|
|
text_stream *language_name; /* identifies it: see above */
|
|
|
|
/* then a great many fields set directly in the definition file: */
|
|
text_stream *file_extension; /* by default output to a file whose name has this extension */
|
|
text_stream *language_details; /* brief explanation of what language is */
|
|
int supports_namespaces;
|
|
text_stream *line_comment;
|
|
text_stream *whole_line_comment;
|
|
text_stream *multiline_comment_open;
|
|
text_stream *multiline_comment_close;
|
|
text_stream *string_literal;
|
|
text_stream *string_literal_escape;
|
|
text_stream *character_literal;
|
|
text_stream *character_literal_escape;
|
|
text_stream *binary_literal_prefix;
|
|
text_stream *octal_literal_prefix;
|
|
text_stream *hexadecimal_literal_prefix;
|
|
text_stream *negative_literal_prefix;
|
|
text_stream *shebang;
|
|
text_stream *line_marker;
|
|
text_stream *before_macro_expansion;
|
|
text_stream *after_macro_expansion;
|
|
text_stream *start_definition;
|
|
text_stream *prolong_definition;
|
|
text_stream *end_definition;
|
|
text_stream *start_ifdef;
|
|
text_stream *end_ifdef;
|
|
text_stream *start_ifndef;
|
|
text_stream *end_ifndef;
|
|
wchar_t type_notation[MAX_ILDF_REGEXP_LENGTH];
|
|
wchar_t function_notation[MAX_ILDF_REGEXP_LENGTH];
|
|
|
|
int suppress_disclaimer;
|
|
int C_like; /* languages with this set have access to extra features */
|
|
|
|
struct linked_list *reserved_words; /* of |reserved_word| */
|
|
struct hash_table built_in_keywords;
|
|
struct colouring_language_block *program; /* algorithm for syntax colouring */
|
|
struct method_set *methods;
|
|
CLASS_DEFINITION
|
|
} programming_language;
|
|
|
|
@ This is a simple one-pass compiler. The |language_reader_state| provides
|
|
the only state preserved as we work through line by line, except of course
|
|
that we are also working on the programming language it is |defining|. The
|
|
|current_block| is the braced block of colouring instructions we are
|
|
currently inside.
|
|
|
|
=
|
|
typedef struct language_reader_state {
|
|
struct programming_language *defining;
|
|
struct colouring_language_block *current_block;
|
|
} language_reader_state;
|
|
|
|
programming_language *Languages::read_definition(filename *F) {
|
|
programming_language *pl = CREATE(programming_language);
|
|
@<Initialise the language to a plain-text state@>;
|
|
language_reader_state lrs;
|
|
lrs.defining = pl;
|
|
lrs.current_block = NULL;
|
|
TextFiles::read(F, FALSE, "can't open programming language definition file",
|
|
TRUE, Languages::read_definition_line, NULL, (void *) &lrs);
|
|
@<Add method calls to the language@>;
|
|
return pl;
|
|
}
|
|
|
|
@<Initialise the language to a plain-text state@> =
|
|
pl->language_name = NULL;
|
|
pl->file_extension = NULL;
|
|
pl->supports_namespaces = FALSE;
|
|
pl->line_comment = NULL;
|
|
pl->whole_line_comment = NULL;
|
|
pl->multiline_comment_open = NULL;
|
|
pl->multiline_comment_close = NULL;
|
|
pl->string_literal = NULL;
|
|
pl->string_literal_escape = NULL;
|
|
pl->character_literal = NULL;
|
|
pl->character_literal_escape = NULL;
|
|
pl->binary_literal_prefix = NULL;
|
|
pl->octal_literal_prefix = NULL;
|
|
pl->hexadecimal_literal_prefix = NULL;
|
|
pl->negative_literal_prefix = NULL;
|
|
pl->shebang = NULL;
|
|
pl->line_marker = NULL;
|
|
pl->before_macro_expansion = NULL;
|
|
pl->after_macro_expansion = NULL;
|
|
pl->start_definition = NULL;
|
|
pl->prolong_definition = NULL;
|
|
pl->end_definition = NULL;
|
|
pl->start_ifdef = NULL;
|
|
pl->end_ifdef = NULL;
|
|
pl->start_ifndef = NULL;
|
|
pl->end_ifndef = NULL;
|
|
pl->C_like = FALSE;
|
|
pl->suppress_disclaimer = FALSE;
|
|
pl->type_notation[0] = 0;
|
|
pl->function_notation[0] = 0;
|
|
|
|
pl->reserved_words = NEW_LINKED_LIST(reserved_word);
|
|
Analyser::initialise_hash_table(&(pl->built_in_keywords));
|
|
pl->program = NULL;
|
|
pl->methods = Methods::new_set();
|
|
|
|
@ Note that there are two levels of extra privilege: any language calling
|
|
itself C-like has functionality for function and structure definitions;
|
|
the language whose name is InC gets even more, without having to ask.
|
|
|
|
Languages have effect through their method calls, which is how those
|
|
extra features are provided. The call to |ACMESupport::add_fallbacks|
|
|
adds generic method calls to give effect to the settings in the definition.
|
|
|
|
@<Add method calls to the language@> =
|
|
if (pl->C_like) CLike::make_c_like(pl);
|
|
if (Str::eq(pl->language_name, I"InC")) InCSupport::add_features(pl);
|
|
ACMESupport::add_fallbacks(pl);
|
|
|
|
@ So, then, the above reads the file and feeds it line by line to this:
|
|
|
|
=
|
|
void Languages::read_definition_line(text_stream *line, text_file_position *tfp, void *v_state) {
|
|
language_reader_state *state = (language_reader_state *) v_state;
|
|
programming_language *pl = state->defining;
|
|
|
|
Str::trim_white_space(line); /* ignore trailing space */
|
|
if (Str::len(line) == 0) return; /* ignore blank lines */
|
|
if (Str::get_first_char(line) == '#') return; /* lines opening with |#| are comments */
|
|
|
|
match_results mr = Regexp::create_mr();
|
|
if (state->current_block) @<Syntax inside a colouring program@>
|
|
else @<Syntax outside a colouring program@>;
|
|
Regexp::dispose_of(&mr);
|
|
}
|
|
|
|
@ Outside a colouring program, you can do three things: start a program,
|
|
declare a reserved keyword, or set a key to a value.
|
|
|
|
@<Syntax outside a colouring program@> =
|
|
if (Regexp::match(&mr, line, L"colouring {")) {
|
|
if (pl->program) Errors::in_text_file("duplicate colouring program", tfp);
|
|
pl->program = Languages::new_block(NULL, WHOLE_LINE_CRULE_RUN);
|
|
state->current_block = pl->program;
|
|
} else if (Regexp::match(&mr, line, L"keyword (%C+) of (%c+?)")) {
|
|
Languages::reserved(pl, Languages::text(mr.exp[0], tfp, FALSE), Languages::colour(mr.exp[1], tfp), tfp);
|
|
} else if (Regexp::match(&mr, line, L"keyword (%C+)")) {
|
|
Languages::reserved(pl, Languages::text(mr.exp[0], tfp, FALSE), RESERVED_COLOUR, tfp);
|
|
} else if (Regexp::match(&mr, line, L"(%c+) *: *(%c+?)")) {
|
|
text_stream *key = mr.exp[0], *value = Str::duplicate(mr.exp[1]);
|
|
if (Str::eq(key, I"Name")) pl->language_name = Languages::text(value, tfp, TRUE);
|
|
else if (Str::eq(key, I"Details"))
|
|
pl->language_details = Languages::text(value, tfp, TRUE);
|
|
else if (Str::eq(key, I"Extension"))
|
|
pl->file_extension = Languages::text(value, tfp, TRUE);
|
|
else if (Str::eq(key, I"Line Comment"))
|
|
pl->line_comment = Languages::text(value, tfp, TRUE);
|
|
else if (Str::eq(key, I"Whole Line Comment"))
|
|
pl->whole_line_comment = Languages::text(value, tfp, TRUE);
|
|
else if (Str::eq(key, I"Multiline Comment Open"))
|
|
pl->multiline_comment_open = Languages::text(value, tfp, TRUE);
|
|
else if (Str::eq(key, I"Multiline Comment Close"))
|
|
pl->multiline_comment_close = Languages::text(value, tfp, TRUE);
|
|
else if (Str::eq(key, I"String Literal"))
|
|
pl->string_literal = Languages::text(value, tfp, TRUE);
|
|
else if (Str::eq(key, I"String Literal Escape"))
|
|
pl->string_literal_escape = Languages::text(value, tfp, TRUE);
|
|
else if (Str::eq(key, I"Character Literal"))
|
|
pl->character_literal = Languages::text(value, tfp, TRUE);
|
|
else if (Str::eq(key, I"Character Literal Escape"))
|
|
pl->character_literal_escape = Languages::text(value, tfp, TRUE);
|
|
else if (Str::eq(key, I"Binary Literal Prefix"))
|
|
pl->binary_literal_prefix = Languages::text(value, tfp, TRUE);
|
|
else if (Str::eq(key, I"Octal Literal Prefix"))
|
|
pl->octal_literal_prefix = Languages::text(value, tfp, TRUE);
|
|
else if (Str::eq(key, I"Hexadecimal Literal Prefix"))
|
|
pl->hexadecimal_literal_prefix = Languages::text(value, tfp, TRUE);
|
|
else if (Str::eq(key, I"Negative Literal Prefix"))
|
|
pl->negative_literal_prefix = Languages::text(value, tfp, TRUE);
|
|
else if (Str::eq(key, I"Shebang"))
|
|
pl->shebang = Languages::text(value, tfp, TRUE);
|
|
else if (Str::eq(key, I"Line Marker"))
|
|
pl->line_marker = Languages::text(value, tfp, TRUE);
|
|
else if (Str::eq(key, I"Before Named Paragraph Expansion"))
|
|
pl->before_macro_expansion = Languages::text(value, tfp, TRUE);
|
|
else if (Str::eq(key, I"After Named Paragraph Expansion"))
|
|
pl->after_macro_expansion = Languages::text(value, tfp, TRUE);
|
|
else if (Str::eq(key, I"Start Definition"))
|
|
pl->start_definition = Languages::text(value, tfp, TRUE);
|
|
else if (Str::eq(key, I"Prolong Definition"))
|
|
pl->prolong_definition = Languages::text(value, tfp, TRUE);
|
|
else if (Str::eq(key, I"End Definition"))
|
|
pl->end_definition = Languages::text(value, tfp, TRUE);
|
|
else if (Str::eq(key, I"Start Ifdef"))
|
|
pl->start_ifdef = Languages::text(value, tfp, TRUE);
|
|
else if (Str::eq(key, I"Start Ifndef"))
|
|
pl->start_ifndef = Languages::text(value, tfp, TRUE);
|
|
else if (Str::eq(key, I"End Ifdef"))
|
|
pl->end_ifdef = Languages::text(value, tfp, TRUE);
|
|
else if (Str::eq(key, I"End Ifndef"))
|
|
pl->end_ifndef = Languages::text(value, tfp, TRUE);
|
|
else if (Str::eq(key, I"C-Like"))
|
|
pl->C_like = Languages::boolean(value, tfp);
|
|
else if (Str::eq(key, I"Suppress Disclaimer"))
|
|
pl->suppress_disclaimer = Languages::boolean(value, tfp);
|
|
else if (Str::eq(key, I"Supports Namespaces"))
|
|
pl->supports_namespaces = Languages::boolean(value, tfp);
|
|
else if (Str::eq(key, I"Function Declaration Notation"))
|
|
Languages::regexp(pl->function_notation, value, tfp);
|
|
else if (Str::eq(key, I"Type Declaration Notation"))
|
|
Languages::regexp(pl->type_notation, value, tfp);
|
|
else {
|
|
Errors::in_text_file("unknown property name before ':'", tfp);
|
|
}
|
|
} else {
|
|
Errors::in_text_file("line in language definition illegible", tfp);
|
|
}
|
|
|
|
@ Inside a colouring program, you can close the current block (which may be
|
|
the entire program), open a new block to apply to each character or to
|
|
runs of a given colour, or give an if-X-then-Y rule:
|
|
|
|
@<Syntax inside a colouring program@> =
|
|
if (Str::eq(line, I"}")) {
|
|
state->current_block = state->current_block->parent;
|
|
} else if (Regexp::match(&mr, line, L"characters {")) {
|
|
colouring_rule *rule = Languages::new_rule(state->current_block);
|
|
rule->execute_block =
|
|
Languages::new_block(state->current_block, CHARACTERS_CRULE_RUN);
|
|
state->current_block = rule->execute_block;
|
|
} else if (Regexp::match(&mr, line, L"characters in (%c+) {")) {
|
|
colouring_rule *rule = Languages::new_rule(state->current_block);
|
|
rule->execute_block =
|
|
Languages::new_block(state->current_block, CHARACTERS_IN_CRULE_RUN);
|
|
rule->execute_block->char_set = Languages::text(mr.exp[0], tfp, FALSE);
|
|
state->current_block = rule->execute_block;
|
|
} else if (Regexp::match(&mr, line, L"runs of (%c+) {")) {
|
|
colouring_rule *rule = Languages::new_rule(state->current_block);
|
|
wchar_t r = UNQUOTED_COLOUR;
|
|
if (Str::ne(mr.exp[0], I"unquoted")) r = Languages::colour(mr.exp[0], tfp);
|
|
rule->execute_block = Languages::new_block(state->current_block, (int) r);
|
|
state->current_block = rule->execute_block;
|
|
} else if (Regexp::match(&mr, line, L"instances of (%c+) {")) {
|
|
colouring_rule *rule = Languages::new_rule(state->current_block);
|
|
rule->execute_block = Languages::new_block(state->current_block, INSTANCES_CRULE_RUN);
|
|
rule->execute_block->run_instance = Languages::text(mr.exp[0], tfp, FALSE);
|
|
state->current_block = rule->execute_block;
|
|
} else if (Regexp::match(&mr, line, L"matches of (%c+) {")) {
|
|
colouring_rule *rule = Languages::new_rule(state->current_block);
|
|
rule->execute_block = Languages::new_block(state->current_block, MATCHES_CRULE_RUN);
|
|
Languages::regexp(rule->execute_block->match_regexp_text, mr.exp[0], tfp);
|
|
state->current_block = rule->execute_block;
|
|
} else if (Regexp::match(&mr, line, L"brackets in (%c+) {")) {
|
|
colouring_rule *rule = Languages::new_rule(state->current_block);
|
|
rule->execute_block = Languages::new_block(state->current_block, BRACKETS_CRULE_RUN);
|
|
Languages::regexp(rule->execute_block->match_regexp_text, mr.exp[0], tfp);
|
|
state->current_block = rule->execute_block;
|
|
} else {
|
|
int at = -1, quoted = FALSE;
|
|
for (int i=0; i<Str::len(line)-1; i++) {
|
|
if (Str::get_at(line, i) == '"') quoted = quoted?FALSE:TRUE;
|
|
if ((quoted) && (Str::get_at(line, i) == '\\')) i++;
|
|
if ((quoted == FALSE) &&
|
|
(Str::get_at(line, i) == '=') && (Str::get_at(line, i+1) == '>')) at = i;
|
|
}
|
|
if (at >= 0) {
|
|
TEMPORARY_TEXT(premiss)
|
|
TEMPORARY_TEXT(conclusion)
|
|
Str::substr(premiss, Str::start(line), Str::at(line, at));
|
|
Str::substr(conclusion, Str::at(line, at+2), Str::end(line));
|
|
Languages::parse_rule(state, premiss, conclusion, tfp);
|
|
DISCARD_TEXT(conclusion)
|
|
DISCARD_TEXT(premiss)
|
|
} else {
|
|
Errors::in_text_file("line in colouring block illegible", tfp);
|
|
}
|
|
}
|
|
|
|
@h Blocks.
|
|
These are code blocks of colouring instructions. A block whose |parent| is |NULL|
|
|
represents a complete program.
|
|
|
|
@d WHOLE_LINE_CRULE_RUN -1 /* This block applies to the whole snippet being coloured */
|
|
@d CHARACTERS_CRULE_RUN -2 /* This block applies to each character in turn */
|
|
@d CHARACTERS_IN_CRULE_RUN -3 /* This block applies to each character from a set in turn */
|
|
@d INSTANCES_CRULE_RUN -4 /* This block applies to each instance in turn */
|
|
@d MATCHES_CRULE_RUN -5 /* This block applies to each match against a regexp in turn */
|
|
@d BRACKETS_CRULE_RUN -6 /* This block applies to bracketed subexpressions in a regexp */
|
|
|
|
=
|
|
typedef struct colouring_language_block {
|
|
struct linked_list *rules; /* of |colouring_rule| */
|
|
struct colouring_language_block *parent; /* or |NULL| for the topmost one */
|
|
int run; /* one of the |*_CRULE_RUN| values, or else a colour */
|
|
struct text_stream *run_instance; /* used only for |INSTANCES_CRULE_RUN| */
|
|
struct text_stream *char_set; /* used only for |CHARACTERS_IN_CRULE_RUN| */
|
|
wchar_t match_regexp_text[MAX_ILDF_REGEXP_LENGTH]; /* used for |MATCHES_CRULE_RUN|, |BRACKETS_CRULE_RUN| */
|
|
|
|
/* workspace during painting */
|
|
struct match_results mr; /* of a regular expression */
|
|
CLASS_DEFINITION
|
|
} colouring_language_block;
|
|
|
|
@ =
|
|
colouring_language_block *Languages::new_block(colouring_language_block *within, int r) {
|
|
colouring_language_block *block = CREATE(colouring_language_block);
|
|
block->rules = NEW_LINKED_LIST(colouring_rule);
|
|
block->parent = within;
|
|
block->run = r;
|
|
block->run_instance = NULL;
|
|
block->char_set = NULL;
|
|
block->match_regexp_text[0] = 0;
|
|
block->mr = Regexp::create_mr();
|
|
return block;
|
|
}
|
|
|
|
@h Colouring Rules.
|
|
Each individual rule has the form: if a premiss, then a conclusion. It will be
|
|
applied to a snippet of text, and the premiss can test that, together with a
|
|
little context before it (where available).
|
|
|
|
Note that rules can be unconditional, in that the premiss always passes.
|
|
|
|
@d NOT_A_RULE_PREFIX 1 /* this isn't a prefix rule */
|
|
@d UNSPACED_RULE_PREFIX 2 /* for |prefix P| */
|
|
@d SPACED_RULE_PREFIX 3 /* for |spaced prefix P| */
|
|
@d OPTIONALLY_SPACED_RULE_PREFIX 4 /* for |optionally spaced prefix P| */
|
|
@d UNSPACED_RULE_SUFFIX 5 /* for |suffix P| */
|
|
@d SPACED_RULE_SUFFIX 6 /* for |spaced suffix P| */
|
|
@d OPTIONALLY_SPACED_RULE_SUFFIX 7 /* for |optionally spaced suffix P| */
|
|
|
|
@d MAX_ILDF_REGEXP_LENGTH 64
|
|
|
|
=
|
|
typedef struct colouring_rule {
|
|
/* the premiss: */
|
|
int sense; /* |FALSE| to negate the condition */
|
|
wchar_t match_colour; /* for |coloured C|, or else |NOT_A_COLOUR| */
|
|
wchar_t match_keyword_of_colour; /* for |keyword C|, or else |NOT_A_COLOUR| */
|
|
struct text_stream *match_text; /* or length 0 to mean "anything" */
|
|
int match_prefix; /* one of the |*_RULE_PREFIX| values above */
|
|
wchar_t match_regexp_text[MAX_ILDF_REGEXP_LENGTH];
|
|
int number; /* for |number N| rules; 0 for others */
|
|
int number_of; /* for |number N of M| rules; 0 for others */
|
|
|
|
/* the conclusion: */
|
|
struct colouring_language_block *execute_block; /* or |NULL|, in which case... */
|
|
wchar_t set_to_colour; /* ...paint the snippet in this colour */
|
|
wchar_t set_prefix_to_colour; /* ...also paint this (same for suffix) */
|
|
int debug; /* ...or print debugging text to console */
|
|
|
|
/* workspace during painting */
|
|
int fix_position; /* where the prefix or suffix started */
|
|
struct match_results mr; /* of a regular expression */
|
|
CLASS_DEFINITION
|
|
} colouring_rule;
|
|
|
|
@ =
|
|
colouring_rule *Languages::new_rule(colouring_language_block *within) {
|
|
if (within == NULL) internal_error("rule outside block");
|
|
colouring_rule *rule = CREATE(colouring_rule);
|
|
ADD_TO_LINKED_LIST(rule, colouring_rule, within->rules);
|
|
rule->sense = TRUE;
|
|
rule->match_colour = NOT_A_COLOUR;
|
|
rule->match_text = NULL;
|
|
rule->match_prefix = NOT_A_RULE_PREFIX;
|
|
rule->match_keyword_of_colour = NOT_A_COLOUR;
|
|
rule->match_regexp_text[0] = 0;
|
|
rule->number = 0;
|
|
rule->number_of = 0;
|
|
|
|
rule->set_to_colour = NOT_A_COLOUR;
|
|
rule->set_prefix_to_colour = NOT_A_COLOUR;
|
|
rule->execute_block = NULL;
|
|
rule->debug = FALSE;
|
|
|
|
rule->fix_position = 0;
|
|
rule->mr = Regexp::create_mr();
|
|
return rule;
|
|
}
|
|
|
|
@ =
|
|
void Languages::parse_rule(language_reader_state *state, text_stream *premiss,
|
|
text_stream *action, text_file_position *tfp) {
|
|
match_results mr = Regexp::create_mr();
|
|
colouring_rule *rule = Languages::new_rule(state->current_block);
|
|
Str::trim_white_space(premiss); Str::trim_white_space(action);
|
|
@<Parse the premiss@>;
|
|
@<Parse the conclusion@>;
|
|
Regexp::dispose_of(&mr);
|
|
}
|
|
|
|
@<Parse the premiss@> =
|
|
while (Regexp::match(&mr, premiss, L"not (%c+)")) {
|
|
rule->sense = (rule->sense)?FALSE:TRUE;
|
|
Str::clear(premiss); Str::copy(premiss, mr.exp[0]);
|
|
}
|
|
if (Regexp::match(&mr, premiss, L"number (%d+)")) {
|
|
rule->number = Str::atoi(mr.exp[0], 0);
|
|
} else if (Regexp::match(&mr, premiss, L"number (%d+) of (%d+)")) {
|
|
rule->number = Str::atoi(mr.exp[0], 0);
|
|
rule->number_of = Str::atoi(mr.exp[1], 0);
|
|
} else if (Regexp::match(&mr, premiss, L"keyword of (%c+)")) {
|
|
rule->match_keyword_of_colour = Languages::colour(mr.exp[0], tfp);
|
|
} else if (Regexp::match(&mr, premiss, L"keyword")) {
|
|
Errors::in_text_file("ambiguous: make it keyword of !reserved or \"keyword\"", tfp);
|
|
} else if (Regexp::match(&mr, premiss, L"prefix (%c+)")) {
|
|
rule->match_prefix = UNSPACED_RULE_PREFIX;
|
|
rule->match_text = Languages::text(mr.exp[0], tfp, FALSE);
|
|
} else if (Regexp::match(&mr, premiss, L"matching (%c+)")) {
|
|
Languages::regexp(rule->match_regexp_text, mr.exp[0], tfp);
|
|
} else if (Regexp::match(&mr, premiss, L"spaced prefix (%c+)")) {
|
|
rule->match_prefix = SPACED_RULE_PREFIX;
|
|
rule->match_text = Languages::text(mr.exp[0], tfp, FALSE);
|
|
} else if (Regexp::match(&mr, premiss, L"optionally spaced prefix (%c+)")) {
|
|
rule->match_prefix = OPTIONALLY_SPACED_RULE_PREFIX;
|
|
rule->match_text = Languages::text(mr.exp[0], tfp, FALSE);
|
|
} else if (Regexp::match(&mr, premiss, L"suffix (%c+)")) {
|
|
rule->match_prefix = UNSPACED_RULE_SUFFIX;
|
|
rule->match_text = Languages::text(mr.exp[0], tfp, FALSE);
|
|
} else if (Regexp::match(&mr, premiss, L"spaced suffix (%c+)")) {
|
|
rule->match_prefix = SPACED_RULE_SUFFIX;
|
|
rule->match_text = Languages::text(mr.exp[0], tfp, FALSE);
|
|
} else if (Regexp::match(&mr, premiss, L"optionally spaced suffix (%c+)")) {
|
|
rule->match_prefix = OPTIONALLY_SPACED_RULE_SUFFIX;
|
|
rule->match_text = Languages::text(mr.exp[0], tfp, FALSE);
|
|
} else if (Regexp::match(&mr, premiss, L"coloured (%c+)")) {
|
|
rule->match_colour = Languages::colour(mr.exp[0], tfp);
|
|
} else if (Str::len(premiss) > 0) {
|
|
rule->match_text = Languages::text(premiss, tfp, FALSE);
|
|
}
|
|
|
|
@<Parse the conclusion@> =
|
|
if (Str::eq(action, I"{")) {
|
|
rule->execute_block =
|
|
Languages::new_block(state->current_block, WHOLE_LINE_CRULE_RUN);
|
|
state->current_block = rule->execute_block;
|
|
} else if (Regexp::match(&mr, action, L"(!%c+) on prefix")) {
|
|
rule->set_prefix_to_colour = Languages::colour(mr.exp[0], tfp);
|
|
} else if (Regexp::match(&mr, action, L"(!%c+) on suffix")) {
|
|
rule->set_prefix_to_colour = Languages::colour(mr.exp[0], tfp);
|
|
} else if (Regexp::match(&mr, action, L"(!%c+) on both")) {
|
|
rule->set_to_colour = Languages::colour(mr.exp[0], tfp);
|
|
rule->set_prefix_to_colour = rule->set_to_colour;
|
|
} else if (Str::get_first_char(action) == '!') {
|
|
rule->set_to_colour = Languages::colour(action, tfp);
|
|
} else if (Str::eq(action, I"debug")) {
|
|
rule->debug = TRUE;
|
|
} else {
|
|
Errors::in_text_file("action after '=>' illegible", tfp);
|
|
}
|
|
|
|
@h Reserved words.
|
|
Note that these can come in any colour, though usually it's |!reserved|.
|
|
|
|
=
|
|
typedef struct reserved_word {
|
|
struct text_stream *word;
|
|
int colour;
|
|
CLASS_DEFINITION
|
|
} reserved_word;
|
|
|
|
reserved_word *Languages::reserved(programming_language *pl, text_stream *W, wchar_t C,
|
|
text_file_position *tfp) {
|
|
reserved_word *rw;
|
|
LOOP_OVER_LINKED_LIST(rw, reserved_word, pl->reserved_words)
|
|
if (Str::eq(rw->word, W)) {
|
|
Errors::in_text_file("duplicate reserved word", tfp);
|
|
}
|
|
rw = CREATE(reserved_word);
|
|
rw->word = Str::duplicate(W);
|
|
rw->colour = (int) C;
|
|
ADD_TO_LINKED_LIST(rw, reserved_word, pl->reserved_words);
|
|
Analyser::mark_reserved_word(&(pl->built_in_keywords), rw->word, (int) C);
|
|
return rw;
|
|
}
|
|
|
|
@h Expressions.
|
|
Language definition files have three types of data: colours, booleans, and
|
|
text. Colours first. Note that there are two pseudo-colours used above,
|
|
but which are not expressible in the syntax of this file.
|
|
|
|
@d DEFINITION_COLOUR 'd'
|
|
@d FUNCTION_COLOUR 'f'
|
|
@d RESERVED_COLOUR 'r'
|
|
@d ELEMENT_COLOUR 'e'
|
|
@d IDENTIFIER_COLOUR 'i'
|
|
@d CHARACTER_COLOUR 'c'
|
|
@d CONSTANT_COLOUR 'n'
|
|
@d STRING_COLOUR 's'
|
|
@d PLAIN_COLOUR 'p'
|
|
@d EXTRACT_COLOUR 'x'
|
|
@d COMMENT_COLOUR '!'
|
|
@d NEWLINE_COLOUR '\n'
|
|
|
|
@d NOT_A_COLOUR ' '
|
|
@d UNQUOTED_COLOUR '_'
|
|
|
|
=
|
|
wchar_t Languages::colour(text_stream *T, text_file_position *tfp) {
|
|
if (Str::get_first_char(T) != '!') {
|
|
Errors::in_text_file("colour names must begin with !", tfp);
|
|
return PLAIN_COLOUR;
|
|
}
|
|
if (Str::eq(T, I"!string")) return STRING_COLOUR;
|
|
else if (Str::eq(T, I"!function")) return FUNCTION_COLOUR;
|
|
else if (Str::eq(T, I"!definition")) return DEFINITION_COLOUR;
|
|
else if (Str::eq(T, I"!reserved")) return RESERVED_COLOUR;
|
|
else if (Str::eq(T, I"!element")) return ELEMENT_COLOUR;
|
|
else if (Str::eq(T, I"!identifier")) return IDENTIFIER_COLOUR;
|
|
else if (Str::eq(T, I"!character")) return CHARACTER_COLOUR;
|
|
else if (Str::eq(T, I"!constant")) return CONSTANT_COLOUR;
|
|
else if (Str::eq(T, I"!plain")) return PLAIN_COLOUR;
|
|
else if (Str::eq(T, I"!extract")) return EXTRACT_COLOUR;
|
|
else if (Str::eq(T, I"!comment")) return COMMENT_COLOUR;
|
|
else {
|
|
Errors::in_text_file("no such !colour", tfp);
|
|
return PLAIN_COLOUR;
|
|
}
|
|
}
|
|
|
|
@ A boolean must be written as |true| or |false|.
|
|
|
|
=
|
|
int Languages::boolean(text_stream *T, text_file_position *tfp) {
|
|
if (Str::eq(T, I"true")) return TRUE;
|
|
else if (Str::eq(T, I"false")) return FALSE;
|
|
else {
|
|
Errors::in_text_file("must be true or false", tfp);
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
@ In text, |\n| represents a newline, |\s| a space and |\t| a tab. Spaces
|
|
can be given in the ordinary way inside a text in any case. |\\| is a
|
|
literal backslash.
|
|
|
|
=
|
|
text_stream *Languages::text(text_stream *T, text_file_position *tfp, int allow) {
|
|
text_stream *V = Str::new();
|
|
if (Str::len(T) > 0) {
|
|
int bareword = TRUE, spaced = FALSE, from = 0, to = Str::len(T)-1;
|
|
if ((to > from) &&
|
|
(Str::get_at(T, from) == '"') && (Str::get_at(T, to) == '"')) {
|
|
bareword = FALSE; from++; to--;
|
|
}
|
|
for (int i=from; i<=to; i++) {
|
|
wchar_t c = Str::get_at(T, i);
|
|
if (c == ' ') spaced = TRUE;
|
|
if ((c == '\\') && (Str::get_at(T, i+1) == 'n')) {
|
|
PUT_TO(V, '\n');
|
|
i++;
|
|
} else if ((c == '\\') && (Str::get_at(T, i+1) == 's')) {
|
|
PUT_TO(V, ' ');
|
|
i++;
|
|
} else if ((c == '\\') && (Str::get_at(T, i+1) == 't')) {
|
|
PUT_TO(V, '\t');
|
|
i++;
|
|
} else if ((c == '\\') && (Str::get_at(T, i+1) == '\\')) {
|
|
PUT_TO(V, '\\');
|
|
i++;
|
|
} else if ((bareword == FALSE) && (c == '\\') && (Str::get_at(T, i+1) == '"')) {
|
|
PUT_TO(V, '"');
|
|
i++;
|
|
} else if ((bareword == FALSE) && (c == '"')) {
|
|
Errors::in_text_file(
|
|
"backslash needed before internal double-quotation mark", tfp);
|
|
} else if ((bareword) && (c == '!') && (i == from)) {
|
|
Errors::in_text_file(
|
|
"a literal starting with ! must be in double-quotation marks", tfp);
|
|
} else if ((bareword) && (c == '/')) {
|
|
Errors::in_text_file(
|
|
"forward slashes can only be used in quoted strings", tfp);
|
|
} else if ((bareword) && (c == '"')) {
|
|
Errors::in_text_file(
|
|
"double-quotation marks can only be used in quoted strings", tfp);
|
|
} else {
|
|
PUT_TO(V, c);
|
|
}
|
|
}
|
|
if ((bareword) && (spaced) && (allow == FALSE)) {
|
|
TEMPORARY_TEXT(err)
|
|
WRITE_TO(err, "'%S' seems to be literal text, but if so it needs double-quotation marks", T);
|
|
Errors::in_text_file_S(err, tfp);
|
|
DISCARD_TEXT(err)
|
|
}
|
|
if (bareword) {
|
|
int rw = FALSE;
|
|
if (Str::eq(V, I"both")) rw = TRUE;
|
|
if (Str::eq(V, I"brackets")) rw = TRUE;
|
|
if (Str::eq(V, I"characters")) rw = TRUE;
|
|
if (Str::eq(V, I"coloured")) rw = TRUE;
|
|
if (Str::eq(V, I"colouring")) rw = TRUE;
|
|
if (Str::eq(V, I"debug")) rw = TRUE;
|
|
if (Str::eq(V, I"false")) rw = TRUE;
|
|
if (Str::eq(V, I"in")) rw = TRUE;
|
|
if (Str::eq(V, I"instances")) rw = TRUE;
|
|
if (Str::eq(V, I"keyword")) rw = TRUE;
|
|
if (Str::eq(V, I"matches")) rw = TRUE;
|
|
if (Str::eq(V, I"matching")) rw = TRUE;
|
|
if (Str::eq(V, I"not")) rw = TRUE;
|
|
if (Str::eq(V, I"of")) rw = TRUE;
|
|
if (Str::eq(V, I"on")) rw = TRUE;
|
|
if (Str::eq(V, I"optionally")) rw = TRUE;
|
|
if (Str::eq(V, I"prefix")) rw = TRUE;
|
|
if (Str::eq(V, I"runs")) rw = TRUE;
|
|
if (Str::eq(V, I"spaced")) rw = TRUE;
|
|
if (Str::eq(V, I"suffix")) rw = TRUE;
|
|
if (Str::eq(V, I"true")) rw = TRUE;
|
|
if (Str::eq(V, I"unquoted")) rw = TRUE;
|
|
|
|
if (rw) {
|
|
TEMPORARY_TEXT(err)
|
|
WRITE_TO(err, "'%S' is a reserved word, so you should put it in double-quotation marks", V);
|
|
Errors::in_text_file_S(err, tfp);
|
|
DISCARD_TEXT(err)
|
|
}
|
|
}
|
|
}
|
|
return V;
|
|
}
|
|
|
|
@ And regular expressions.
|
|
|
|
=
|
|
void Languages::regexp(wchar_t *write_to, text_stream *T, text_file_position *tfp) {
|
|
if (write_to == NULL) internal_error("no buffer");
|
|
write_to[0] = 0;
|
|
if (Str::len(T) > 0) {
|
|
int from = 0, to = Str::len(T)-1, x = 0;
|
|
if ((to > from) &&
|
|
(Str::get_at(T, from) == '/') && (Str::get_at(T, to) == '/')) {
|
|
from++; to--;
|
|
for (int i=from; i<=to; i++) {
|
|
wchar_t c = Str::get_at(T, i);
|
|
if (c == '\\') {
|
|
wchar_t w = Str::get_at(T, i+1);
|
|
if (w == '\\') {
|
|
x = Languages::add_to_regexp(write_to, x, w);
|
|
} else if (w == 'd') {
|
|
x = Languages::add_escape_to_regexp(write_to, x, 'd');
|
|
} else if (w == 't') {
|
|
x = Languages::add_escape_to_regexp(write_to, x, 't');
|
|
} else if (w == 's') {
|
|
x = Languages::add_to_regexp(write_to, x, ' ');
|
|
} else if (w == 'S') {
|
|
x = Languages::add_escape_to_regexp(write_to, x, 'C');
|
|
} else if (w == '"') {
|
|
x = Languages::add_escape_to_regexp(write_to, x, 'q');
|
|
} else {
|
|
x = Languages::add_escape_to_regexp(write_to, x, w);
|
|
}
|
|
i++;
|
|
continue;
|
|
}
|
|
if (c == '.') {
|
|
x = Languages::add_escape_to_regexp(write_to, x, 'c');
|
|
continue;
|
|
}
|
|
if (c == '%') {
|
|
x = Languages::add_escape_to_regexp(write_to, x, '%');
|
|
continue;
|
|
}
|
|
x = Languages::add_to_regexp(write_to, x, c);
|
|
}
|
|
} else {
|
|
Errors::in_text_file(
|
|
"the expression to match must be in slashes '/'", tfp);
|
|
}
|
|
if (x >= MAX_ILDF_REGEXP_LENGTH)
|
|
Errors::in_text_file(
|
|
"the expression to match is too long", tfp);
|
|
}
|
|
}
|
|
|
|
int Languages::add_to_regexp(wchar_t *write_to, int i, wchar_t c) {
|
|
if (i < MAX_ILDF_REGEXP_LENGTH) write_to[i++] = c;
|
|
return i;
|
|
}
|
|
|
|
int Languages::add_escape_to_regexp(wchar_t *write_to, int i, wchar_t c) {
|
|
i = Languages::add_to_regexp(write_to, i, '%');
|
|
i = Languages::add_to_regexp(write_to, i, c);
|
|
return i;
|
|
}
|